[llvm] [InstCombine] Simplify phi using KnownBits of condition (PR #134712)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 7 12:38:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Andreas Jonson (andjo403)
<details>
<summary>Changes</summary>
Simplify the arms of a phi based on the KnownBits implied by the condition for the predecessor basic block.
I noticed regression of the form in the proof for a PR that I work on, I Do not know how if possible to make the proof generic.
proof: https://alive2.llvm.org/ce/z/Xh7CQb
---
Patch is 173.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134712.diff
32 Files Affected:
- (modified) llvm/lib/Analysis/ValueTracking.cpp (+19-8)
- (modified) llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp (+19)
- (modified) llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll (+1-1)
- (modified) llvm/test/Transforms/InstCombine/known-phi-br.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll (+2-13)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll (+10-19)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+6-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll (+6-8)
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+20-35)
- (modified) llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/if-conversion.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+54-92)
- (modified) llvm/test/Transforms/LoopVectorize/phi-cost.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll (+34-43)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop.ll (+16-20)
- (modified) llvm/test/Transforms/LoopVectorize/reduction.ll (+10-20)
- (modified) llvm/test/Transforms/LoopVectorize/runtime-check.ll (+5-8)
- (modified) llvm/test/Transforms/LoopVectorize/uniform-args-call-variants.ll (+4-6)
- (modified) llvm/test/Transforms/PGOProfile/chr.ll (+1-1)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll (+4-6)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll (+4-8)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll (+6-8)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll (+1-2)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll (+2-4)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll (+1-2)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll (+1-2)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll (+1-2)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll (+35-36)
``````````diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3b0249f91d6d7..3bcfce75b931f 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -840,16 +840,27 @@ void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
if (Q.DC && Q.DT) {
// Handle dominating conditions.
- for (BranchInst *BI : Q.DC->conditionsFor(V)) {
- BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
- if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
- computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
- /*Invert*/ false);
+ const BasicBlock *CxtIBB = Q.CxtI->getParent();
+ if (isa<PHINode>(Q.CxtI))
+ for (BranchInst *BI : Q.DC->conditionsFor(V)) {
+ if (BI->getSuccessor(0) != CxtIBB && BI->getSuccessor(1) != CxtIBB)
+ continue;
- BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
- if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
+ bool Invert = BI->getSuccessor(1) == CxtIBB;
computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
- /*Invert*/ true);
+ Invert);
+ }
+ else
+ for (BranchInst *BI : Q.DC->conditionsFor(V)) {
+ BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
+ if (Q.DT->dominates(Edge0, CxtIBB))
+ computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
+ /*Invert*/ false);
+
+ BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
+ if (Q.DT->dominates(Edge1, CxtIBB))
+ computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
+ /*Invert*/ true);
}
if (Known.hasConflict())
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 80308bf92dbbc..c3a6ff691a08e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1625,6 +1625,25 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
return replaceInstUsesWith(PN, &IdenticalPN);
}
+ if (PN.getType()->isIntegerTy()) {
+ bool MadeChange = false;
+ SimplifyQuery Q = SQ.getWithInstruction(&PN);
+ for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
+ Value *V = PN.getIncomingValue(I);
+ if (isa<ConstantInt>(V))
+ continue;
+
+ KnownBits Known = llvm::computeKnownBits(V, /* Depth */ 0, Q);
+ if (Known.isConstant()) {
+ replaceOperand(PN, I,
+ ConstantInt::get(V->getType(), Known.getConstant()));
+ MadeChange = true;
+ }
+ }
+ if (MadeChange)
+ return &PN;
+ }
+
// If this is an integer PHI and we know that it has an illegal type, see if
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index eb5f96d8f942d..ed1247d9aec5b 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -178,7 +178,7 @@ define {ptr, i64} @test4(i1 %cond1, i1 %cond2, ptr %p1, ptr %p2) {
; CHECK-NEXT: br i1 [[COND3_NOT]], label %[[EXIT]], label %[[BBB4]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[VAL1:%.*]] = phi ptr [ [[VAL11]], %[[BBB2]] ], [ [[VAL21]], %[[BBB3]] ], [ [[VAL31]], %[[BBB4]] ]
-; CHECK-NEXT: [[VAL2:%.*]] = phi i64 [ [[VAL12]], %[[BBB2]] ], [ [[VAL22]], %[[BBB3]] ], [ [[VAL32]], %[[BBB4]] ]
+; CHECK-NEXT: [[VAL2:%.*]] = phi i64 [ [[VAL12]], %[[BBB2]] ], [ [[VAL22]], %[[BBB3]] ], [ 0, %[[BBB4]] ]
; CHECK-NEXT: [[TMP:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL1]], 0
; CHECK-NEXT: [[RES:%.*]] = insertvalue { ptr, i64 } [[TMP]], i64 [[VAL2]], 1
; CHECK-NEXT: ret { ptr, i64 } [[RES]]
diff --git a/llvm/test/Transforms/InstCombine/known-phi-br.ll b/llvm/test/Transforms/InstCombine/known-phi-br.ll
index 1ad0ed42d8d34..64d3344eb2066 100644
--- a/llvm/test/Transforms/InstCombine/known-phi-br.ll
+++ b/llvm/test/Transforms/InstCombine/known-phi-br.ll
@@ -15,8 +15,7 @@ define i64 @limit_i64_eq_7(i64 %x) {
; CHECK: body:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ 7, [[BODY]] ]
-; CHECK-NEXT: ret i64 [[RES]]
+; CHECK-NEXT: ret i64 7
;
entry:
%cmp = icmp eq i64 %x, 7
@@ -38,8 +37,7 @@ define i64 @limit_i64_ne_255(i64 %x) {
; CHECK: body:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ 255, [[BODY]] ]
-; CHECK-NEXT: ret i64 [[RES]]
+; CHECK-NEXT: ret i64 255
;
entry:
%cmp = icmp ne i64 %x, 255
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
index c2502aac5b61d..ad1292997802d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -54,11 +54,10 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967292
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ 0, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX6]]
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6]]
@@ -83,14 +82,13 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP_N11]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP27]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
index 1921e5f193aa3..f441eadc6ae0e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
@@ -7,10 +7,6 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
; CHECK-COST-LABEL: sadd
-; CHECK-COST: Found an estimated cost of 6 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
-; CHECK-COST: Cost of 4 for VF 2: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>)
-; CHECK-COST: Cost of 1 for VF 4: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>)
-; CHECK-COST: Cost of 1 for VF 8: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>)
define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
; CHECK-LABEL: @saddsat(
@@ -60,7 +56,6 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC6:%.*]] = and i64 [[TMP0]], 4294967292
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC6]] to i32
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
@@ -72,7 +67,7 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT21]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX15:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX15:%.*]] = phi i64 [ 0, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX16:%.*]] = shl i64 [[INDEX15]], 1
; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[OFFSET_IDX16]]
; CHECK-NEXT: [[OFFSET_IDX18:%.*]] = shl i64 [[INDEX15]], 1
@@ -128,11 +123,6 @@ while.end: ; preds = %while.body, %entry
}
; CHECK-COST-LABEL: umin
-; CHECK-COST: Found an estimated cost of 2 for VF 1 For instruction: %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset)
-; CHECK-COST: Cost of 1 for VF 2: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>)
-; CHECK-COST: Cost of 1 for VF 4: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>)
-; CHECK-COST: Cost of 1 for VF 8: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>)
-; CHECK-COST: Cost of 1 for VF 16: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>)
define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
@@ -179,7 +169,6 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967288
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC5]] to i32
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
@@ -189,7 +178,7 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT18]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX14]]
; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX14]]
; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i8>, ptr [[NEXT_GEP15]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
index c459dd28fdaee..fe31987f29364 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
@@ -70,11 +70,10 @@ define i64 @add_i32_i64(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -135,11 +134,10 @@ define i64 @add_i16_i64(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -200,11 +198,10 @@ define i64 @add_i8_i64(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
@@ -585,11 +582,10 @@ define i64 @mla_i32_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i32 [[I_010]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -661,11 +657,10 @@ define i64 @mla_i16_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[I_012]]
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -741,11 +736,10 @@ define i64 @mla_i8_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i32 [[I_012]]
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
@@ -1200,11 +1194,10 @@ define i64 @red_mla_ext_s16_u16_s64(ptr noalias nocapture readonly %A, ptr noali
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ]
; CHECK-NEXT: [[S_010:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[I_011]]
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 1
@@ -1473,14 +1466,13 @@ define i64 @mla_xx_sext_zext(ptr nocapture noundef readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/134712
More information about the llvm-commits
mailing list