[llvm] 0cfc651 - [InstCombine] ease use constraint in tryFactorization()

Wed Aug 24 09:11:04 PDT 2022

Author: Sanjay Patel
Date: 2022-08-24T12:10:54-04:00
New Revision: 0cfc6510323fbb5a56a5de23cbc65f7cc30fd34c

URL: https://github.com/llvm/llvm-project/commit/0cfc6510323fbb5a56a5de23cbc65f7cc30fd34c
DIFF: https://github.com/llvm/llvm-project/commit/0cfc6510323fbb5a56a5de23cbc65f7cc30fd34c.diff

LOG: [InstCombine] ease use constraint in tryFactorization()

The stronger one-use checks prevented transforms like this:
(x * y) + x --> x * (y + 1)
(x * y) - x --> x * (y - 1)

https://alive2.llvm.org/ce/z/eMhvQa

This is one of the IR transforms suggested in issue #57255.

This should be better in IR because it removes a use of a
variable operand (we already fold the case with a constant
multiply operand).
The backend should be able to re-distribute the multiply if
that's better for the target.

Differential Revision: https://reviews.llvm.org/D132412

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/add.ll
    llvm/test/Transforms/InstCombine/and-or.ll
    llvm/test/Transforms/InstCombine/ctpop.ll
    llvm/test/Transforms/InstCombine/sub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0fd21ad635b8..07f8c689acf1 100644

--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -657,9 +657,9 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I,
       // If "B op D" simplifies then it can be formed with no cost.
       V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
 
-      // If "B op D" doesn't simplify then only go on if both of the existing
+      // If "B op D" doesn't simplify then only go on if one of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
-      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+      if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
         V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
       if (V)
         RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
@@ -677,9 +677,9 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I,
       // If "A op C" simplifies then it can be formed with no cost.
       V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
 
-      // If "A op C" doesn't simplify then only go on if both of the existing
+      // If "A op C" doesn't simplify then only go on if one of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
-      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+      if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
         V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
       if (V)
         RetVal = Builder.CreateBinOp(InnerOpcode, V, B);

diff  --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 1289ebe29935..68b394f1d81a 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -1756,10 +1756,12 @@ define i32 @add_add_add_commute3(i32 %A, i32 %B, i32 %C, i32 %D) {
   ret i32 %G
 }
 
+; x * y + x --> (y + 1) * x
+
 define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_add_common_factor_commute1(
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[M]], [[X]]
+; CHECK-NEXT:    [[X1:%.*]] = add i8 [[Y:%.*]], 1
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %m = mul nsw i8 %x, %y
@@ -1769,8 +1771,8 @@ define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) {
 
 define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @mul_add_common_factor_commute2(
-; CHECK-NEXT:    [[M:%.*]] = mul nuw <2 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i8> [[M]], [[X]]
+; CHECK-NEXT:    [[M1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[A:%.*]] = mul nuw <2 x i8> [[M1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
   %m = mul nuw <2 x i8> %y, %x
@@ -1781,8 +1783,8 @@ define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) {
 define i8 @mul_add_common_factor_commute3(i8 %p, i8 %y) {
 ; CHECK-LABEL: @mul_add_common_factor_commute3(
 ; CHECK-NEXT:    [[X:%.*]] = mul i8 [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X]], [[M]]
+; CHECK-NEXT:    [[M1:%.*]] = add i8 [[Y:%.*]], 1
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X]], [[M1]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %x = mul i8 %p, %p ; thwart complexity-based canonicalization
@@ -1795,8 +1797,8 @@ define i8 @mul_add_common_factor_commute4(i8 %p, i8 %q) {
 ; CHECK-LABEL: @mul_add_common_factor_commute4(
 ; CHECK-NEXT:    [[X:%.*]] = mul i8 [[P:%.*]], [[P]]
 ; CHECK-NEXT:    [[Y:%.*]] = mul i8 [[Q:%.*]], [[Q]]
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[Y]], [[X]]
-; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X]], [[M]]
+; CHECK-NEXT:    [[M1:%.*]] = add i8 [[Y]], 1
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X]], [[M1]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %x = mul i8 %p, %p ; thwart complexity-based canonicalization
@@ -1806,6 +1808,8 @@ define i8 @mul_add_common_factor_commute4(i8 %p, i8 %q) {
   ret i8 %a
 }
 
+; negative test - uses
+
 define i8 @mul_add_common_factor_use(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_add_common_factor_use(
 ; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]]

diff  --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll
index 2b1d4539afa1..7a37af406f9d 100644
--- a/llvm/test/Transforms/InstCombine/and-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-or.ll
@@ -671,11 +671,11 @@ define i32 @or_or_and_noOneUse_fail1(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[A:%.*]], 23
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHR]], 157
 ; CHECK-NEXT:    call void @use2(i32 [[AND]])
-; CHECK-NEXT:    [[AND3:%.*]] = and i32 [[SHR]], [[B:%.*]]
+; CHECK-NEXT:    [[AND1:%.*]] = or i32 [[B:%.*]], 157
+; CHECK-NEXT:    [[OR:%.*]] = and i32 [[SHR]], [[AND1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[B]], 23
 ; CHECK-NEXT:    [[AND9:%.*]] = and i32 [[TMP1]], 157
-; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[AND3]], [[AND9]]
-; CHECK-NEXT:    [[R:%.*]] = or i32 [[TMP2]], [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[OR]], [[AND9]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %shr = ashr i32 %a, 23
@@ -700,9 +700,9 @@ define { i1, i1, i1, i1, i1 } @or_or_and_noOneUse_fail2(i1 %a_0, i1 %a_1, i1 %a_
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[A_1:%.*]], [[B_1:%.*]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP0]], [[A_1]]
-; CHECK-NEXT:    [[TMP6:%.*]] = and i1 [[TMP2]], [[B_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[D:%.*]] = or i1 [[TMP7]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP2]], [[A_1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = and i1 [[TMP6]], [[B_1]]
+; CHECK-NEXT:    [[D:%.*]] = or i1 [[TMP7]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertvalue { i1, i1, i1, i1, i1 } zeroinitializer, i1 [[D]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertvalue { i1, i1, i1, i1, i1 } [[TMP9]], i1 [[TMP4]], 1

diff  --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index d714d624cee7..a7b0fb42f28c 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -448,9 +448,9 @@ define i32 @parity_xor_extra_use(i32 %arg, i32 %arg1) {
 ; CHECK-NEXT:    [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]]
 ; CHECK-NEXT:    [[I2:%.*]] = and i32 [[I]], 1
 ; CHECK-NEXT:    tail call void @use(i32 [[I2]])
-; CHECK-NEXT:    [[I3:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]]
-; CHECK-NEXT:    [[I4:%.*]] = and i32 [[I3]], 1
-; CHECK-NEXT:    [[I5:%.*]] = xor i32 [[I4]], [[I2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
+; CHECK-NEXT:    [[I5:%.*]] = and i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i32 [[I5]]
 ;
   %i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -467,9 +467,9 @@ define i32 @parity_xor_extra_use2(i32 %arg, i32 %arg1) {
 ; CHECK-NEXT:    [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]]
 ; CHECK-NEXT:    [[I2:%.*]] = and i32 [[I]], 1
 ; CHECK-NEXT:    tail call void @use(i32 [[I2]])
-; CHECK-NEXT:    [[I3:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]]
-; CHECK-NEXT:    [[I4:%.*]] = and i32 [[I3]], 1
-; CHECK-NEXT:    [[I5:%.*]] = xor i32 [[I2]], [[I4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[ARG1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
+; CHECK-NEXT:    [[I5:%.*]] = and i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i32 [[I5]]
 ;
   %i = tail call i32 @llvm.ctpop.i32(i32 %arg1)

diff  --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 871489fd13e8..dd1471c3b548 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -2003,10 +2003,13 @@ define i16 @urem_zext_noundef(i8 noundef %x, i8 %y) {
   ret i16 %z
 }
 
+; x * y - x --> (y - 1) * x
+; TODO: The mul could retain nsw.
+
 define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_sub_common_factor_commute1(
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = sub nsw i8 [[M]], [[X]]
+; CHECK-NEXT:    [[X1:%.*]] = add i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %m = mul nsw i8 %x, %y
@@ -2014,10 +2017,12 @@ define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) {
   ret i8 %a
 }
 
+; TODO: The mul could retain nuw.
+
 define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @mul_sub_common_factor_commute2(
-; CHECK-NEXT:    [[M:%.*]] = mul nuw <2 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = sub nuw <2 x i8> [[M]], [[X]]
+; CHECK-NEXT:    [[M1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[A:%.*]] = mul <2 x i8> [[M1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
   %m = mul nuw <2 x i8> %y, %x
@@ -2025,10 +2030,12 @@ define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %a
 }
 
+; x - (x * y) --> (1 - y) * x
+
 define i8 @mul_sub_common_factor_commute3(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_sub_common_factor_commute3(
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = sub nsw i8 [[X]], [[M]]
+; CHECK-NEXT:    [[M1:%.*]] = sub i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[M1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %m = mul nuw i8 %x, %y
@@ -2038,8 +2045,8 @@ define i8 @mul_sub_common_factor_commute3(i8 %x, i8 %y) {
 
 define i8 @mul_sub_common_factor_commute4(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_sub_common_factor_commute4(
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = sub nuw i8 [[X]], [[M]]
+; CHECK-NEXT:    [[M1:%.*]] = sub i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[M1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[A]]
 ;
   %m = mul nsw i8 %y, %x
@@ -2047,6 +2054,8 @@ define i8 @mul_sub_common_factor_commute4(i8 %x, i8 %y) {
   ret i8 %a
 }
 
+; negative test - uses
+
 define i8 @mul_sub_common_factor_use(i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_sub_common_factor_use(
 ; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]]