[llvm] 5ebd598 - [VPlan] Fold BinaryAnd x, 0 -> 0 in simplifyRecipe.

Wed Aug 27 14:35:22 PDT 2025

Author: Florian Hahn
Date: 2025-08-27T22:35:08+01:00
New Revision: 5ebd59806b09f9e3d1a0770cd74a06d99ef23a06

URL: https://github.com/llvm/llvm-project/commit/5ebd59806b09f9e3d1a0770cd74a06d99ef23a06
DIFF: https://github.com/llvm/llvm-project/commit/5ebd59806b09f9e3d1a0770cd74a06d99ef23a06.diff

LOG: [VPlan] Fold BinaryAnd x, 0 -> 0 in simplifyRecipe.

This also fixes a cost-model divergence in the newly added
tests in constant-fold.ll

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
    llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
    llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b33359c9bb0d6..18c2bef90f08f 100644

--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -145,6 +145,16 @@ inline int_pred_ty<is_all_ones> m_AllOnes() {
   return int_pred_ty<is_all_ones>();
 }
 
+struct is_zero_int {
+  bool isValue(const APInt &C) const { return C.isZero(); }
+};
+
+/// Match an integer 0 or a vector with all elements equal to 0.
+/// For vectors, this includes constants with undefined elements.
+inline int_pred_ty<is_zero_int> m_ZeroInt() {
+  return int_pred_ty<is_zero_int>();
+}
+
 /// Matching combinators
 template <typename LTy, typename RTy> struct match_combine_or {
   LTy L;
@@ -397,6 +407,13 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
   return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
 }
 
+/// Match a binary AND operation.
+template <typename Op0_t, typename Op1_t>
+inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t>
+m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
+  return m_c_Binary<Instruction::And, Op0_t, Op1_t>(Op0, Op1);
+}
+
 /// Match a binary OR operation. Note that while conceptually the operands can
 /// be matched commutatively, \p Commutative defaults to false in line with the
 /// IR-based pattern matching infrastructure. Use m_c_BinaryOr for a commutative

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d32d2a9ad11f7..6c5f9b7302292 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1084,6 +1084,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
     return;
   }
 
+  // AND x, 0 -> 0
+  if (match(&R, m_c_BinaryAnd(m_VPValue(X), m_ZeroInt()))) {
+    Def->replaceAllUsesWith(R.getOperand(0) == X ? R.getOperand(1)
+                                                 : R.getOperand(0));
+    return;
+  }
+
   if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
     return Def->replaceAllUsesWith(X);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index da1a5aa3a9f04..db54ca61f715b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -243,3 +243,188 @@ loop.latch:
 exit:
   ret void
 }
+
+define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @redundant_and_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
+; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT:    store i32 0, ptr [[TMP12]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
+; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT:    store i32 0, ptr [[TMP15]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
+; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT:    store i32 0, ptr [[TMP18]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]]
+; CHECK:       then.1:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IV]], 2
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[CMP]], false
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false
+; CHECK-NEXT:    br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
+; CHECK:       then.2:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT:    store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 %c.0, label %loop.latch, label %then.1
+
+then.1:
+  %cmp = icmp eq i32 %iv, 2
+  %or = or i1 %cmp, false
+  %cond = select i1 %or, i1 %c.1, i1 false
+  br i1 %cond, label %then.2, label %loop.latch
+
+then.2:
+  %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+  store i32 0, ptr %gep, align 4
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add nuw nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 3
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @redundant_and_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT:    store i32 0, ptr [[TMP7]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if1:
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT:    store i32 0, ptr [[TMP10]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue2:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT:    store i32 0, ptr [[TMP13]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT:    store i32 0, ptr [[TMP16]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]]
+; CHECK:       then.1:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IV]], 2
+; CHECK-NEXT:    [[OR:%.*]] = and i1 false, [[CMP]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false
+; CHECK-NEXT:    br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
+; CHECK:       then.2:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT:    store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  br i1 %c.0, label %loop.latch, label %then.1
+
+then.1:
+  %cmp = icmp eq i32 %iv, 2
+  %or = and i1 false, %cmp
+  %cond = select i1 %or, i1 %c.1, i1 false
+  br i1 %cond, label %then.2, label %loop.latch
+
+then.2:
+  %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+  store i32 0, ptr %gep, align 4
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add nuw nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv.next, 3
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
index aecfc668cf293..7fa911feb8db5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
@@ -77,14 +77,12 @@ define i64 @second_lshr_operand_zero_via_scev() {
 ; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[STEP_ADD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[VEC_IND2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = lshr <2 x i32> [[STEP_ADD4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64>
-; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP0]], <2 x i64> [[TMP2]], <2 x i64> [[TMP6]]
-; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP3]], <2 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP0]], <2 x i64> zeroinitializer, <2 x i64> [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> [[TMP7]]
 ; CHECK-NEXT:    [[TMP10]] = or <2 x i64> [[TMP8]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP11]] = or <2 x i64> [[TMP9]], [[VEC_PHI1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

diff  --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
index 9e94768fc2cbc..089fc99ff8ba5 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
@@ -129,16 +129,8 @@ define void @ext_cmp(ptr %src.1, ptr %src.2, ptr noalias %dst) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[SRC_1]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i16> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[SRC_2]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr [[TMP0]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]