[llvm] 5ebd598 - [VPlan] Fold BinaryAnd x, 0 -> 0 in simplifyRecipe.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 14:35:22 PDT 2025
Author: Florian Hahn
Date: 2025-08-27T22:35:08+01:00
New Revision: 5ebd59806b09f9e3d1a0770cd74a06d99ef23a06
URL: https://github.com/llvm/llvm-project/commit/5ebd59806b09f9e3d1a0770cd74a06d99ef23a06
DIFF: https://github.com/llvm/llvm-project/commit/5ebd59806b09f9e3d1a0770cd74a06d99ef23a06.diff
LOG: [VPlan] Fold BinaryAnd x, 0 -> 0 in simplifyRecipe.
This also fixes a cost-model divergence in the newly added
tests in constant-fold.ll
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b33359c9bb0d6..18c2bef90f08f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -145,6 +145,16 @@ inline int_pred_ty<is_all_ones> m_AllOnes() {
return int_pred_ty<is_all_ones>();
}
+struct is_zero_int {
+ bool isValue(const APInt &C) const { return C.isZero(); }
+};
+
+/// Match an integer 0 or a vector with all elements equal to 0.
+/// For vectors, this includes constants with undefined elements.
+inline int_pred_ty<is_zero_int> m_ZeroInt() {
+ return int_pred_ty<is_zero_int>();
+}
+
/// Matching combinators
template <typename LTy, typename RTy> struct match_combine_or {
LTy L;
@@ -397,6 +407,13 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
+/// Match a binary AND operation.
+template <typename Op0_t, typename Op1_t>
+inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t>
+m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_c_Binary<Instruction::And, Op0_t, Op1_t>(Op0, Op1);
+}
+
/// Match a binary OR operation. Note that while conceptually the operands can
/// be matched commutatively, \p Commutative defaults to false in line with the
/// IR-based pattern matching infrastructure. Use m_c_BinaryOr for a commutative
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d32d2a9ad11f7..6c5f9b7302292 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1084,6 +1084,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ // AND x, 0 -> 0
+ if (match(&R, m_c_BinaryAnd(m_VPValue(X), m_ZeroInt()))) {
+ Def->replaceAllUsesWith(R.getOperand(0) == X ? R.getOperand(1)
+ : R.getOperand(0));
+ return;
+ }
+
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return Def->replaceAllUsesWith(X);
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index da1a5aa3a9f04..db54ca61f715b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -243,3 +243,188 @@ loop.latch:
exit:
ret void
}
+
+define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @redundant_and_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
+; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
+; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK: pred.store.if7:
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue8:
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]]
+; CHECK: then.1:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], false
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false
+; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
+; CHECK: then.2:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 %c.0, label %loop.latch, label %then.1
+
+then.1:
+ %cmp = icmp eq i32 %iv, 2
+ %or = or i1 %cmp, false
+ %cond = select i1 %or, i1 %c.1, i1 false
+ br i1 %cond, label %then.2, label %loop.latch
+
+then.2:
+ %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 0, ptr %gep, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 3
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @redundant_and_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if1:
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue2:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]]
+; CHECK: then.1:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
+; CHECK-NEXT: [[OR:%.*]] = and i1 false, [[CMP]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false
+; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
+; CHECK: then.2:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 %c.0, label %loop.latch, label %then.1
+
+then.1:
+ %cmp = icmp eq i32 %iv, 2
+ %or = and i1 false, %cmp
+ %cond = select i1 %or, i1 %c.1, i1 false
+ br i1 %cond, label %then.2, label %loop.latch
+
+then.2:
+ %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 0, ptr %gep, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 3
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
index aecfc668cf293..7fa911feb8db5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
@@ -77,14 +77,12 @@ define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[STEP_ADD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[VEC_IND2]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[STEP_ADD4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP0]], <2 x i64> [[TMP2]], <2 x i64> [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP3]], <2 x i64> [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP0]], <2 x i64> zeroinitializer, <2 x i64> [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> [[TMP7]]
; CHECK-NEXT: [[TMP10]] = or <2 x i64> [[TMP8]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP11]] = or <2 x i64> [[TMP9]], [[VEC_PHI1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
index 9e94768fc2cbc..089fc99ff8ba5 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
@@ -129,16 +129,8 @@ define void @ext_cmp(ptr %src.1, ptr %src.2, ptr noalias %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[SRC_1]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i16> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[SRC_2]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 2
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16>
-; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP9]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
More information about the llvm-commits
mailing list