[llvm] 0bdab96 - [InstCombine] Contract x^2 + 2*x*y + y^2 to (x + y)^2 (integer)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 9 05:19:37 PDT 2023


Author: Christoph Stiller
Date: 2023-08-09T14:19:21+02:00
New Revision: 0bdab96a5a391161485348ffc7cc32c915a29050

URL: https://github.com/llvm/llvm-project/commit/0bdab96a5a391161485348ffc7cc32c915a29050
DIFF: https://github.com/llvm/llvm-project/commit/0bdab96a5a391161485348ffc7cc32c915a29050.diff

LOG: [InstCombine] Contract x^2 + 2*x*y + y^2 to (x + y)^2 (integer)

There are a number of variations on the pattern, depending on
order.

Proof: https://alive2.llvm.org/ce/z/ri5iFw

Differential Revision: https://reviews.llvm.org/D156026

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/test/Transforms/InstCombine/add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 5dc778daec2485..ee5329d9bc68d8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -995,6 +995,40 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
   return nullptr;
 }
 
+// Fold variations of a^2 + 2*a*b + b^2 -> (a + b)^2
+Instruction *InstCombinerImpl::foldSquareSumInts(BinaryOperator &I) {
+  Value *A, *B;
+
+  // (a * a) + (((a << 1) + b) * b)
+  bool Matches = match(
+      &I, m_c_Add(m_OneUse(m_Mul(m_Value(A), m_Deferred(A))),
+                  m_OneUse(m_Mul(m_c_Add(m_Shl(m_Deferred(A), m_SpecificInt(1)),
+                                         m_Value(B)),
+                                 m_Deferred(B)))));
+
+  // ((a * b) << 1)  or ((a << 1) * b)
+  // +
+  // (a * a + b * b) or (b * b + a * a)
+  if (!Matches) {
+    Matches = match(
+        &I,
+        m_c_Add(m_CombineOr(m_OneUse(m_Shl(m_Mul(m_Value(A), m_Value(B)),
+                                           m_SpecificInt(1))),
+                            m_OneUse(m_Mul(m_Shl(m_Value(A), m_SpecificInt(1)),
+                                           m_Value(B)))),
+                m_OneUse(m_c_Add(m_Mul(m_Deferred(A), m_Deferred(A)),
+                                 m_Mul(m_Deferred(B), m_Deferred(B))))));
+  }
+
+  // if one of them matches: -> (a + b)^2
+  if (Matches) {
+    Value *AB = Builder.CreateAdd(A, B);
+    return BinaryOperator::CreateMul(AB, AB);
+  }
+
+  return nullptr;
+}
+
 // Matches multiplication expression Op * C where C is a constant. Returns the
 // constant value in C and the other operand in Op. Returns true if such a
 // match is found.
@@ -1615,6 +1649,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
         I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
                                    {Builder.CreateOr(A, B)}));
 
+  if (Instruction *Res = foldSquareSumInts(I))
+    return Res;
+
   if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
     return Res;
 

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index e2fecce2aeb1b2..9d50e5b3a4ba44 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -540,6 +540,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
 
   Instruction *foldAddWithConstant(BinaryOperator &Add);
 
+  Instruction *foldSquareSumInts(BinaryOperator &I);
+
   /// Try to rotate an operation below a PHI node, using PHI nodes for
   /// its operands.
   Instruction *foldPHIArgOpIntoPHI(PHINode &PN);

diff  --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 5dada159d543da..7234fb39e566eb 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -3098,11 +3098,8 @@ define i32 @add_zext_sext_i1_
diff erent_values(i1 %a, i1 %b) {
 
 define i32 @add_reduce_sqr_sum_nsw(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_nsw(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWO_A:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3115,11 +3112,8 @@ define i32 @add_reduce_sqr_sum_nsw(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_u(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_u(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWO_A:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %a_sq = mul i32 %a, %a
@@ -3132,11 +3126,8 @@ define i32 @add_reduce_sqr_sum_u(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_nuw(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_nuw(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nuw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWO_A:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i32 [[TWO_A_PLUS_B]], [[B]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %a_sq = mul nuw i32 %a, %a
@@ -3149,11 +3140,8 @@ define i32 @add_reduce_sqr_sum_nuw(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_flipped(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_flipped(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWO_A:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A_SQ]], [[MUL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3166,11 +3154,8 @@ define i32 @add_reduce_sqr_sum_flipped(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_flipped2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_flipped2(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWO_A:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3183,11 +3168,8 @@ define i32 @add_reduce_sqr_sum_flipped2(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_flipped3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_flipped3(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWO_A:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TWO_A_PLUS_B]], [[B]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3200,11 +3182,8 @@ define i32 @add_reduce_sqr_sum_flipped3(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order2(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3218,11 +3197,8 @@ define i32 @add_reduce_sqr_sum_order2(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order2_flipped(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB_B2]], [[A_SQ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3236,11 +3212,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order2_flipped2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped2(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3254,11 +3227,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped2(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order2_flipped3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped3(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB1:%.*]] = add i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB_B2:%.*]] = mul i32 [[TWOAB1]], [[B]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3272,12 +3242,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped3(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order3(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3291,12 +3257,8 @@ define i32 @add_reduce_sqr_sum_order3(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order3_flipped(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order3_flipped(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A2_B2]], [[TWOAB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3310,12 +3272,8 @@ define i32 @add_reduce_sqr_sum_order3_flipped(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order3_flipped2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order3_flipped2(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[B_SQ]], [[A_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3329,12 +3287,8 @@ define i32 @add_reduce_sqr_sum_order3_flipped2(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order3_flipped3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order3_flipped3(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOA:%.*]] = shl i32 [[A]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOA]], [[B:%.*]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3348,12 +3302,8 @@ define i32 @add_reduce_sqr_sum_order3_flipped3(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order4(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order4(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[AB:%.*]] = mul i32 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB:%.*]] = shl i32 [[AB]], 1
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3367,12 +3317,8 @@ define i32 @add_reduce_sqr_sum_order4(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order4_flipped(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[AB:%.*]] = mul i32 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB:%.*]] = shl i32 [[AB]], 1
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A2_B2]], [[TWOAB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3386,12 +3332,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order4_flipped2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped2(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[AB:%.*]] = mul i32 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB:%.*]] = shl i32 [[AB]], 1
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[B_SQ]], [[A_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3405,12 +3347,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped2(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order4_flipped3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped3(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[AB:%.*]] = mul i32 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TWOAB:%.*]] = shl i32 [[AB]], 1
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3424,12 +3362,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped3(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order4_flipped4(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order4_flipped4(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[AB:%.*]] = mul i32 [[B:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOAB:%.*]] = shl i32 [[AB]], 1
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3443,12 +3377,8 @@ define i32 @add_reduce_sqr_sum_order4_flipped4(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order5(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order5(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOB:%.*]] = shl i32 [[B:%.*]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3462,12 +3392,8 @@ define i32 @add_reduce_sqr_sum_order5(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order5_flipped(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOB:%.*]] = shl i32 [[B:%.*]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[A2_B2]], [[TWOAB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3481,12 +3407,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order5_flipped2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped2(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOB:%.*]] = shl i32 [[B:%.*]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[B_SQ]], [[A_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3500,12 +3422,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped2(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order5_flipped3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped3(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOB:%.*]] = shl i32 [[B:%.*]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a
@@ -3519,12 +3437,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped3(i32 %a, i32 %b) {
 
 define i32 @add_reduce_sqr_sum_order5_flipped4(i32 %a, i32 %b) {
 ; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped4(
-; CHECK-NEXT:    [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
-; CHECK-NEXT:    [[TWOB:%.*]] = shl i32 [[B:%.*]], 1
-; CHECK-NEXT:    [[TWOAB:%.*]] = mul i32 [[TWOB]], [[A]]
-; CHECK-NEXT:    [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
-; CHECK-NEXT:    [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
-; CHECK-NEXT:    [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AB2]]
 ;
   %a_sq = mul nsw i32 %a, %a


        


More information about the llvm-commits mailing list