[llvm] 15e3d86 - [InstCombine] reassociate bitwise logic chains based on uses

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 21 06:44:22 PDT 2022


Author: Sanjay Patel
Date: 2022-08-21T09:42:14-04:00
New Revision: 15e3d869119289991072679f82622072818468e6

URL: https://github.com/llvm/llvm-project/commit/15e3d869119289991072679f82622072818468e6
DIFF: https://github.com/llvm/llvm-project/commit/15e3d869119289991072679f82622072818468e6.diff

LOG: [InstCombine] reassociate bitwise logic chains based on uses

(X op Y) op Z --> (Y op Z) op X

This isn't a complete solution (see TODO tests for possible refinements),
but it shows some nice wins and doesn't seem to cause any harm. I think
the most potential danger is from conflicting with other folds and causing
an infinite loop - that's the reason for avoiding patterns with constant
operands.

Alternatively, we could try this in the reassociate pass, but we would not
immediately see all of the logic folds that instcombine provides. I also
looked at improving ValueTracking's isImpliedCondition() (and we should
still add some enhancements there), but that would not work in general for
bitwise logic reduction.

The tests that reduce completely to 0/-1 are motivated by issue #56653.

Differential Revision: https://reviews.llvm.org/D131356

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and-or-icmps.ll
    llvm/test/Transforms/InstCombine/and-or-not.ll
    llvm/test/Transforms/InstCombine/and-or.ll
    llvm/test/Transforms/InstCombine/and-xor-or.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
    llvm/test/Transforms/LoopVectorize/reduction-predselect.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index f037305ecf1d5..59217d9db3d48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1744,6 +1744,29 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
   return nullptr;
 }
 
+/// Try to reassociate a pair of binops so that values with one use only are
+/// part of the same instruction. This may enable folds that are limited with
+/// multi-use restrictions and makes it more likely to match other patterns that
+/// are looking for a common operand.
+static Instruction *reassociateForUses(BinaryOperator &BO,
+                                       InstCombinerImpl::BuilderTy &Builder) {
+  Instruction::BinaryOps Opcode = BO.getOpcode();
+  Value *X, *Y, *Z;
+  if (match(&BO, m_c_BinOp(Opcode,
+                           m_OneUse(m_c_BinOp(Opcode, m_Value(X),
+                                              m_OneUse(m_Value(Y)))),
+                           m_OneUse(m_Value(Z))))) {
+    // (X op Y) op Z --> (Y op Z) op X
+    if (!isa<Constant>(X) && !isa<Constant>(Y) && !isa<Constant>(Z) &&
+        !X->hasOneUse()) {
+      Value *YZ = Builder.CreateBinOp(Opcode, Y, Z);
+      return BinaryOperator::Create(Opcode, YZ, X);
+    }
+  }
+
+  return nullptr;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -2204,6 +2227,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
   if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN))
     return replaceInstUsesWith(I, Builder.CreateAnd(Start, Step));
 
+  if (Instruction *R = reassociateForUses(I, Builder))
+    return R;
+
   return nullptr;
 }
 
@@ -3162,6 +3188,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
           Builder.CreateOr(C, Builder.CreateAnd(A, B)), D);
   }
 
+  if (Instruction *R = reassociateForUses(I, Builder))
+    return R;
+
   return nullptr;
 }
 
@@ -3899,5 +3928,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
                         m_Value(Y))))
     return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1);
 
+  if (Instruction *R = reassociateForUses(I, Builder))
+    return R;
+
   return nullptr;
 }

diff  --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index 8bfd39b1708ef..3dc35b82a7eb2 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -373,8 +373,8 @@ define void @simplify_before_foldAndOfICmps() {
 ; CHECK-NEXT:    [[C7:%.*]] = icmp slt i16 [[L7]], 0
 ; CHECK-NEXT:    [[B15:%.*]] = xor i1 [[C7]], [[C10]]
 ; CHECK-NEXT:    [[C6:%.*]] = xor i1 [[B15]], true
-; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[C10]], [[C5]]
-; CHECK-NEXT:    [[C3:%.*]] = and i1 [[TMP2]], [[C6]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[C5]], [[C6]]
+; CHECK-NEXT:    [[C3:%.*]] = and i1 [[TMP2]], [[C10]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[C10]], true
 ; CHECK-NEXT:    [[C18:%.*]] = or i1 [[C7]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i1 [[C3]] to i64

diff  --git a/llvm/test/Transforms/InstCombine/and-or-not.ll b/llvm/test/Transforms/InstCombine/and-or-not.ll
index 4ee3bd81ddf8f..73cb2f29f5dfc 100644
--- a/llvm/test/Transforms/InstCombine/and-or-not.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-not.ll
@@ -645,11 +645,7 @@ define i32 @xor_to_xnor4(float %fa, float %fb) {
 
 define i4 @simplify_or_common_op_commute0(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @simplify_or_common_op_commute0(
-; CHECK-NEXT:    [[XY:%.*]] = and i4 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = and i4 [[XY]], [[Z:%.*]]
-; CHECK-NEXT:    [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1
-; CHECK-NEXT:    [[R:%.*]] = or i4 [[NOT_XYZ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    ret i4 -1
 ;
   %xy = and i4 %x, %y
   %xyz = and i4 %xy, %z
@@ -660,11 +656,7 @@ define i4 @simplify_or_common_op_commute0(i4 %x, i4 %y, i4 %z)  {
 
 define i4 @simplify_or_common_op_commute1(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @simplify_or_common_op_commute1(
-; CHECK-NEXT:    [[XY:%.*]] = and i4 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = and i4 [[XY]], [[Z:%.*]]
-; CHECK-NEXT:    [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1
-; CHECK-NEXT:    [[R:%.*]] = or i4 [[NOT_XYZ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    ret i4 -1
 ;
   %xy = and i4 %y, %x
   %xyz = and i4 %xy, %z
@@ -673,15 +665,11 @@ define i4 @simplify_or_common_op_commute1(i4 %x, i4 %y, i4 %z)  {
   ret i4 %r
 }
 
+; The common operand may bubble through multiple instructions.
+
 define i4 @simplify_or_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q)  {
 ; CHECK-LABEL: @simplify_or_common_op_commute2(
-; CHECK-NEXT:    [[Z:%.*]] = mul i4 [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[XY:%.*]] = and i4 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = and i4 [[Z]], [[XY]]
-; CHECK-NEXT:    [[XYZQ:%.*]] = and i4 [[XYZ]], [[Q:%.*]]
-; CHECK-NEXT:    [[NOT_XYZQ:%.*]] = xor i4 [[XYZQ]], -1
-; CHECK-NEXT:    [[R:%.*]] = or i4 [[NOT_XYZQ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    ret i4 -1
 ;
   %z = mul i4 %p, %p ; thwart complexity-based canonicalization
   %xy = and i4 %x, %y
@@ -694,12 +682,7 @@ define i4 @simplify_or_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q)  {
 
 define <2 x i4> @simplify_or_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p)  {
 ; CHECK-LABEL: @simplify_or_common_op_commute3(
-; CHECK-NEXT:    [[Z:%.*]] = mul <2 x i4> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[XY:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = and <2 x i4> [[Z]], [[XY]]
-; CHECK-NEXT:    [[NOT_XYZ:%.*]] = xor <2 x i4> [[XYZ]], <i4 -1, i4 -1>
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[NOT_XYZ]], [[X]]
-; CHECK-NEXT:    ret <2 x i4> [[R]]
+; CHECK-NEXT:    ret <2 x i4> <i4 -1, i4 -1>
 ;
   %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization
   %xy = and <2 x i4> %y, %x
@@ -711,12 +694,8 @@ define <2 x i4> @simplify_or_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i
 
 define i4 @simplify_and_common_op_commute0(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @simplify_and_common_op_commute0(
-; CHECK-NEXT:    [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    call void @use(i4 [[X]])
-; CHECK-NEXT:    [[XYZ:%.*]] = or i4 [[XY]], [[Z:%.*]]
-; CHECK-NEXT:    [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1
-; CHECK-NEXT:    [[R:%.*]] = and i4 [[NOT_XYZ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    call void @use(i4 [[X:%.*]])
+; CHECK-NEXT:    ret i4 0
 ;
   %xy = or i4 %x, %y
   call void @use(i4 %x)
@@ -728,11 +707,7 @@ define i4 @simplify_and_common_op_commute0(i4 %x, i4 %y, i4 %z)  {
 
 define i4 @simplify_and_common_op_commute1(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @simplify_and_common_op_commute1(
-; CHECK-NEXT:    [[XY:%.*]] = or i4 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = or i4 [[XY]], [[Z:%.*]]
-; CHECK-NEXT:    [[NOT_XYZ:%.*]] = xor i4 [[XYZ]], -1
-; CHECK-NEXT:    [[R:%.*]] = and i4 [[NOT_XYZ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    ret i4 0
 ;
   %xy = or i4 %y, %x
   %xyz = or i4 %xy, %z
@@ -741,15 +716,11 @@ define i4 @simplify_and_common_op_commute1(i4 %x, i4 %y, i4 %z)  {
   ret i4 %r
 }
 
+; The common operand may bubble through multiple instructions.
+
 define i4 @simplify_and_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q)  {
 ; CHECK-LABEL: @simplify_and_common_op_commute2(
-; CHECK-NEXT:    [[Z:%.*]] = mul i4 [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = or i4 [[Z]], [[XY]]
-; CHECK-NEXT:    [[XYZQ:%.*]] = or i4 [[XYZ]], [[Q:%.*]]
-; CHECK-NEXT:    [[NOT_XYZQ:%.*]] = xor i4 [[XYZQ]], -1
-; CHECK-NEXT:    [[R:%.*]] = and i4 [[NOT_XYZQ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    ret i4 0
 ;
   %z = mul i4 %p, %p ; thwart complexity-based canonicalization
   %xy = or i4 %x, %y
@@ -762,12 +733,7 @@ define i4 @simplify_and_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q)  {
 
 define <2 x i4> @simplify_and_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p)  {
 ; CHECK-LABEL: @simplify_and_common_op_commute3(
-; CHECK-NEXT:    [[Z:%.*]] = mul <2 x i4> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[XY:%.*]] = or <2 x i4> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = or <2 x i4> [[Z]], [[XY]]
-; CHECK-NEXT:    [[NOT_XYZ:%.*]] = xor <2 x i4> [[XYZ]], <i4 -1, i4 -1>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i4> [[NOT_XYZ]], [[X]]
-; CHECK-NEXT:    ret <2 x i4> [[R]]
+; CHECK-NEXT:    ret <2 x i4> zeroinitializer
 ;
   %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization
   %xy = or <2 x i4> %y, %x
@@ -777,6 +743,8 @@ define <2 x i4> @simplify_and_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x
   ret <2 x i4> %r
 }
 
+; TODO: This should simplify.
+
 define i4 @simplify_and_common_op_use1(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @simplify_and_common_op_use1(
 ; CHECK-NEXT:    [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]]
@@ -794,6 +762,8 @@ define i4 @simplify_and_common_op_use1(i4 %x, i4 %y, i4 %z)  {
   ret i4 %r
 }
 
+; TODO: This should simplify.
+
 define i4 @simplify_and_common_op_use2(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @simplify_and_common_op_use2(
 ; CHECK-NEXT:    [[XY:%.*]] = or i4 [[X:%.*]], [[Y:%.*]]
@@ -813,9 +783,8 @@ define i4 @simplify_and_common_op_use2(i4 %x, i4 %y, i4 %z)  {
 
 define i4 @reduce_xor_common_op_commute0(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @reduce_xor_common_op_commute0(
-; CHECK-NEXT:    [[XY:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = xor i4 [[XY]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i4 [[XYZ]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i4 [[R]]
 ;
   %xy = xor i4 %x, %y
@@ -826,9 +795,8 @@ define i4 @reduce_xor_common_op_commute0(i4 %x, i4 %y, i4 %z)  {
 
 define i4 @reduce_xor_common_op_commute1(i4 %x, i4 %y, i4 %z)  {
 ; CHECK-LABEL: @reduce_xor_common_op_commute1(
-; CHECK-NEXT:    [[XY:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = xor i4 [[XY]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i4 [[XYZ]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i4 [[R]]
 ;
   %xy = xor i4 %y, %x
@@ -840,11 +808,9 @@ define i4 @reduce_xor_common_op_commute1(i4 %x, i4 %y, i4 %z)  {
 define i4 @annihilate_xor_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q)  {
 ; CHECK-LABEL: @annihilate_xor_common_op_commute2(
 ; CHECK-NEXT:    [[Z:%.*]] = mul i4 [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[XY:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = xor i4 [[Z]], [[XY]]
-; CHECK-NEXT:    [[XYZQ:%.*]] = xor i4 [[XYZ]], [[Q:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor i4 [[XYZQ]], [[X]]
-; CHECK-NEXT:    ret i4 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i4 [[TMP1]], [[Q:%.*]]
+; CHECK-NEXT:    ret i4 [[TMP2]]
 ;
   %z = mul i4 %p, %p ; thwart complexity-based canonicalization
   %xy = xor i4 %x, %y
@@ -857,9 +823,8 @@ define i4 @annihilate_xor_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q)  {
 define <2 x i4> @reduce_xor_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p)  {
 ; CHECK-LABEL: @reduce_xor_common_op_commute3(
 ; CHECK-NEXT:    [[Z:%.*]] = mul <2 x i4> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[XY:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = xor <2 x i4> [[Z]], [[XY]]
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[XYZ]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i4> [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i4> [[R]]
 ;
   %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization

diff  --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll
index cd2e3c2f57a49..2b1d4539afa19 100644
--- a/llvm/test/Transforms/InstCombine/and-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-or.ll
@@ -672,10 +672,10 @@ define i32 @or_or_and_noOneUse_fail1(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHR]], 157
 ; CHECK-NEXT:    call void @use2(i32 [[AND]])
 ; CHECK-NEXT:    [[AND3:%.*]] = and i32 [[SHR]], [[B:%.*]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND3]], [[AND]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[B]], 23
 ; CHECK-NEXT:    [[AND9:%.*]] = and i32 [[TMP1]], 157
-; CHECK-NEXT:    [[R:%.*]] = or i32 [[OR]], [[AND9]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[AND3]], [[AND9]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[TMP2]], [[AND]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %shr = ashr i32 %a, 23
@@ -701,8 +701,8 @@ define { i1, i1, i1, i1, i1 } @or_or_and_noOneUse_fail2(i1 %a_0, i1 %a_1, i1 %a_
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP0]], [[A_1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = and i1 [[TMP2]], [[B_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    [[D:%.*]] = or i1 [[TMP7]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[D:%.*]] = or i1 [[TMP7]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertvalue { i1, i1, i1, i1, i1 } zeroinitializer, i1 [[D]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertvalue { i1, i1, i1, i1, i1 } [[TMP9]], i1 [[TMP4]], 1

diff  --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll
index 6a70fd9cd4b9e..edfc62ab2b05d 100644
--- a/llvm/test/Transforms/InstCombine/and-xor-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll
@@ -3637,8 +3637,8 @@ define i32 @not_and_and_or_no_or_use5(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[B]], [[A]]
 ; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[OR1]], -1
 ; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NOT2]], [[B]]
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NOT2]], [[C]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[TMP1]], [[B]]
 ; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]]
 ; CHECK-NEXT:    call void @use(i32 [[OR1]])
 ; CHECK-NEXT:    ret i32 [[OR2]]
@@ -3659,8 +3659,8 @@ define i32 @not_and_and_or_no_or_use6(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[B]], [[A]]
 ; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[OR1]], -1
 ; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NOT2]], [[B]]
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NOT2]], [[C]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[TMP1]], [[B]]
 ; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]]
 ; CHECK-NEXT:    call void @use(i32 [[NOT1]])
 ; CHECK-NEXT:    ret i32 [[OR2]]
@@ -3702,8 +3702,8 @@ define i32 @not_and_and_or_no_or_use8(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[B]], [[A]]
 ; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[OR1]], -1
 ; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NOT2]], [[B]]
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NOT2]], [[C]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[TMP1]], [[B]]
 ; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]]
 ; CHECK-NEXT:    call void @use(i32 [[AND2]])
 ; CHECK-NEXT:    ret i32 [[OR2]]
@@ -3916,11 +3916,10 @@ define i32 @not_or_or_and_no_and_use5(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use5
 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[B]], [[A]]
-; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[AND1]], -1
 ; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOT2]], [[B]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[OR1]], [[C]]
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[NOT2]], [[C]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[TMP1]], [[B]]
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[AND1]], [[OR2]]
 ; CHECK-NEXT:    call void @use(i32 [[AND1]])
 ; CHECK-NEXT:    ret i32 [[AND2]]
 ;
@@ -3940,9 +3939,9 @@ define i32 @not_or_or_and_no_and_use6(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[B]], [[A]]
 ; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[AND1]], -1
 ; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOT2]], [[B]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[OR1]], [[C]]
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[NOT2]], [[C]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[TMP1]], [[B]]
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[AND1]], [[OR2]]
 ; CHECK-NEXT:    call void @use(i32 [[NOT1]])
 ; CHECK-NEXT:    ret i32 [[AND2]]
 ;
@@ -3981,11 +3980,10 @@ define i32 @not_or_or_and_no_and_use8(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: define {{[^@]+}}@not_or_or_and_no_and_use8
 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[B]], [[A]]
-; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[AND1]], -1
 ; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOT2]], [[B]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[OR1]], [[C]]
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[OR2]], [[NOT1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[NOT2]], [[C]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[TMP1]], [[B]]
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[AND1]], [[OR2]]
 ; CHECK-NEXT:    call void @use(i32 [[OR2]])
 ; CHECK-NEXT:    ret i32 [[AND2]]
 ;

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
index 4763c1d28eeb5..de8e5533b616a 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
@@ -180,9 +180,9 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD1]]
-; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP6]] = and <4 x i32> [[VEC_PHI]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
index c9a1e642dd3f1..539abf74972b4 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
@@ -366,9 +366,9 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP40:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP38]]
-; CHECK-NEXT:    [[TMP41:%.*]] = and <4 x i32> [[TMP40]], [[TMP39]]
-; CHECK-NEXT:    [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]]
+; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260


        


More information about the llvm-commits mailing list