[llvm] ee0bf64 - [InstCombine] try to fold mul by neg-power-of-2 to shl

Wed Sep 21 12:17:30 PDT 2022

Author: Sanjay Patel
Date: 2022-09-21T15:09:39-04:00
New Revision: ee0bf6472291bd40687046ab2990f987a022a940

URL: https://github.com/llvm/llvm-project/commit/ee0bf6472291bd40687046ab2990f987a022a940
DIFF: https://github.com/llvm/llvm-project/commit/ee0bf6472291bd40687046ab2990f987a022a940.diff

LOG: [InstCombine] try to fold mul by neg-power-of-2 to shl

`(A * -2**C) + B --> B - (A << C)`

https://alive2.llvm.org/ce/z/A6BWkf

This inverts what Negator was doing before:
D134310 / 0f32a5dea0e9

Analysis and codegen are generally better without multiply,
so we should favor this form even if we trade add for sub
(because those are generally equivalent cost operations).

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/add.ll
    llvm/test/Transforms/InstCombine/neg-alloca.ll
    llvm/test/Transforms/Reassociate/basictest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index af72e8af75ef3..70b216d3b3641 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1433,6 +1433,15 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
     return BinaryOperator::CreateAdd(NewMul, ConstantInt::getAllOnesValue(Ty));
   }
 
+  // (A * -2**C) + B --> B - (A << C)
+  const APInt *NegPow2C;
+  if (match(&I, m_c_Add(m_OneUse(m_Mul(m_Value(A), m_NegatedPower2(NegPow2C))),
+                        m_Value(B)))) {
+    Constant *ShiftAmtC = ConstantInt::get(Ty, NegPow2C->countTrailingZeros());
+    Value *Shl = Builder.CreateShl(A, ShiftAmtC);
+    return BinaryOperator::CreateSub(B, Shl);
+  }
+
   // TODO(jingyue): Consider willNotOverflowSignedAdd and
   // willNotOverflowUnsignedAdd to reduce the number of invocations of
   // computeKnownBits.

diff  --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 90aaa4a1f82fa..c8bb8cc3ab365 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -2180,8 +2180,8 @@ define i8 @add_select_sub_both_arms_simplify_use2(i1 %b, i8 %a) {
 
 define i5 @demand_low_bits_uses(i8 %x, i8 %y) {
 ; CHECK-LABEL: @demand_low_bits_uses(
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    call void @use(i8 [[A]])
 ; CHECK-NEXT:    [[R:%.*]] = trunc i8 [[Y]] to i5
 ; CHECK-NEXT:    ret i5 [[R]]
@@ -2197,8 +2197,8 @@ define i5 @demand_low_bits_uses(i8 %x, i8 %y) {
 
 define i6 @demand_low_bits_uses_extra_bit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @demand_low_bits_uses_extra_bit(
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    call void @use(i8 [[A]])
 ; CHECK-NEXT:    [[R:%.*]] = trunc i8 [[A]] to i6
 ; CHECK-NEXT:    ret i6 [[R]]
@@ -2257,11 +2257,11 @@ define { i64, i64 } @PR57576(i64 noundef %x, i64 noundef %y, i64 noundef %z, i64
 ; CHECK-NEXT:    [[ZW:%.*]] = zext i64 [[W:%.*]] to i128
 ; CHECK-NEXT:    [[ZZ:%.*]] = zext i64 [[Z:%.*]] to i128
 ; CHECK-NEXT:    [[SHY:%.*]] = shl nuw i128 [[ZY]], 64
-; CHECK-NEXT:    [[MW:%.*]] = mul i128 [[ZW]], -18446744073709551616
 ; CHECK-NEXT:    [[XY:%.*]] = or i128 [[SHY]], [[ZX]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i128 [[XY]], [[ZZ]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i128 [[SUB]], [[MW]]
-; CHECK-NEXT:    [[T:%.*]] = trunc i128 [[SUB]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i128 [[ZW]], 64
+; CHECK-NEXT:    [[TMP2:%.*]] = or i128 [[TMP1]], [[ZZ]]
+; CHECK-NEXT:    [[ADD:%.*]] = sub i128 [[XY]], [[TMP2]]
+; CHECK-NEXT:    [[T:%.*]] = trunc i128 [[ADD]] to i64
 ; CHECK-NEXT:    [[H:%.*]] = lshr i128 [[ADD]], 64
 ; CHECK-NEXT:    [[T2:%.*]] = trunc i128 [[H]] to i64
 ; CHECK-NEXT:    [[R1:%.*]] = insertvalue { i64, i64 } poison, i64 [[T]], 0
@@ -2287,8 +2287,8 @@ define { i64, i64 } @PR57576(i64 noundef %x, i64 noundef %y, i64 noundef %z, i64
 
 define i8 @mul_negpow2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_negpow2(
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], -2
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %m = mul i8 %x, -2
@@ -2299,8 +2299,8 @@ define i8 @mul_negpow2(i8 %x, i8 %y) {
 define <2 x i8> @mul_negpow2_commute_vec(<2 x i8> %x, <2 x i8> %p) {
 ; CHECK-LABEL: @mul_negpow2_commute_vec(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[M:%.*]] = mul <2 x i8> [[X:%.*]], <i8 -8, i8 -8>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[Y]], [[M]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[A:%.*]] = sub <2 x i8> [[Y]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
   %y = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization
@@ -2309,6 +2309,8 @@ define <2 x i8> @mul_negpow2_commute_vec(<2 x i8> %x, <2 x i8> %p) {
   ret <2 x i8> %a
 }
 
+; negative test - extra use
+
 define i8 @mul_negpow2_use(i8 %x) {
 ; CHECK-LABEL: @mul_negpow2_use(
 ; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], -2
@@ -2322,6 +2324,8 @@ define i8 @mul_negpow2_use(i8 %x) {
   ret i8 %a
 }
 
+; negative test - not negative-power-of-2 multiplier
+
 define i8 @mul_not_negpow2(i8 %x) {
 ; CHECK-LABEL: @mul_not_negpow2(
 ; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], -3

diff  --git a/llvm/test/Transforms/InstCombine/neg-alloca.ll b/llvm/test/Transforms/InstCombine/neg-alloca.ll
index 13e5afc5dade5..f3effe1022793 100644
--- a/llvm/test/Transforms/InstCombine/neg-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/neg-alloca.ll
@@ -5,8 +5,8 @@ declare void @use(i32 *)
 define void @foo(i64 %X) {
 ; Currently we cannot handle expressions of the form Offset - X * Scale.
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i64 [[X:%.*]], -4
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i64 [[TMP1]], 24
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 24, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = alloca i8, i64 [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32*
 ; CHECK-NEXT:    call void @use(i32* nonnull [[TMP4]])

diff  --git a/llvm/test/Transforms/Reassociate/basictest.ll b/llvm/test/Transforms/Reassociate/basictest.ll
index 0a1722d439b86..56c8c8651f08c 100644
--- a/llvm/test/Transforms/Reassociate/basictest.ll
+++ b/llvm/test/Transforms/Reassociate/basictest.ll
@@ -259,8 +259,8 @@ define i64 @test16(i1 %cmp, i64 %a, i64 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[FACTOR:%.*]] = mul i64 [[A:%.*]], -4
-; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[FACTOR]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[A:%.*]], 2
+; CHECK-NEXT:    [[ADD2:%.*]] = sub i64 [[B:%.*]], [[TMP0]]
 ; CHECK-NEXT:    ret i64 [[ADD2]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i64 0