[llvm] 53e7443 - [LSR] Don't count conditional loads/store as enabling pre/post-index (#159573)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 06:53:20 PDT 2025
Author: John Brawn
Date: 2025-10-30T13:53:15Z
New Revision: 53e7443e0c0db82fa82d7b9009bbc5cdac1c9fac
URL: https://github.com/llvm/llvm-project/commit/53e7443e0c0db82fa82d7b9009bbc5cdac1c9fac
DIFF: https://github.com/llvm/llvm-project/commit/53e7443e0c0db82fa82d7b9009bbc5cdac1c9fac.diff
LOG: [LSR] Don't count conditional loads/store as enabling pre/post-index (#159573)
When a load/store is conditionally executed in a loop it isn't a
candidate for pre/post-index addressing, as the increment of the address
would only happen on those loop iterations where the load/store is
executed.
Detect this and only discount the AddRec cost when the load/store is
unconditional.
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1a279b6198182..001215abcfb26 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1318,6 +1318,11 @@ class LSRUse {
/// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop = true;
+ /// This records whether all of the fixups using this LSRUse are unconditional
+ /// within the loop, meaning they will be executed on every path to the loop
+ /// latch. This includes fixups before early exits.
+ bool AllFixupsUnconditional = true;
+
/// RigidFormula is set to true to guarantee that this use will be associated
/// with a single formula--the one that initially matched. Some SCEV
/// expressions cannot be expanded. This allows LSR to consider the registers
@@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
const SCEV *Start;
- const SCEVConstant *Step;
- if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
+ const APInt *Step;
+ if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) {
// If the step size matches the base offset, we could use pre-indexed
// addressing.
- if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
- Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
- ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
- SE->isLoopInvariant(Start, L)))
+ bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) &&
+ F.BaseOffset.isFixed() &&
+ *Step == F.BaseOffset.getFixedValue();
+ bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) &&
+ !isa<SCEVConstant>(Start) &&
+ SE->isLoopInvariant(Start, L);
+ // We can only pre or post index when the load/store is unconditional.
+ if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
LoopCost = 0;
+ }
}
+
// If the loop counts down to zero and we'll be using a hardware loop then
// the addrec will be combined into the hardware loop instruction.
if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
@@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const {
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+ if (AllFixupsUnconditional)
+ OS << ", all-fixups-unconditional";
+
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
@@ -2213,6 +2227,7 @@ class LSRInstance {
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+ bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;
void CollectLoopInvariantFixupsAndFormulae();
@@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = TmpPostIncLoops;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
// Create SCEV as Formula for calculating baseline cost
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
@@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
return true;
}
+/// Test whether this fixup will be executed each time the corresponding IV
+/// increment instruction is executed.
+bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {
+ // If the fixup block dominates the IV increment block then there is no path
+ // through the loop to the increment that doesn't pass through the fixup.
+ return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent());
+}
+
/// Check for other uses of loop-invariant values which we're tracking. These
/// other uses will pin these values in registers, making them less profitable
/// for elimination.
@@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.OperandValToReplace = U;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
// Transfer the fixups of LU to LUThatHas.
for (LSRFixup &Fixup : LU.Fixups) {
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
index 9c36bae6fac13..ec257bcf123f3 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
@@ -6,77 +6,81 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: ldr.w r12, [r0]
; CHECK-NEXT: subs.w r9, r1, #1
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: and r8, r9, #3
+; CHECK-NEXT: and r6, r9, #3
; CHECK-NEXT: subs r7, r1, #2
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB0_4
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: mov.w r10, #0
+; CHECK-NEXT: cbnz r6, .LBB0_7
+; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: movs r6, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_4: @ %while.body.preheader.new
; CHECK-NEXT: bic r7, r9, #3
-; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
; CHECK-NEXT: subs r7, #4
+; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: mov.w r8, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: movs r7, #4
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r10, [r0, #16]!
-; CHECK-NEXT: sub.w r9, r9, #4
-; CHECK-NEXT: ldrd r5, r4, [r0, #-12]
-; CHECK-NEXT: ldr r11, [r0, #-4]
+; CHECK-NEXT: ldr r11, [r0, #16]!
+; CHECK-NEXT: ldrd r5, r7, [r0, #-12]
+; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: cmp r12, r5
-; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #3
; CHECK-NEXT: csel r5, r5, r12, gt
-; CHECK-NEXT: cmp r5, r4
+; CHECK-NEXT: csinc r6, r10, r8, le
+; CHECK-NEXT: cmp r5, r7
; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #2
-; CHECK-NEXT: csel r5, r4, r5, gt
-; CHECK-NEXT: cmp r5, r11
+; CHECK-NEXT: addgt.w r6, r8, #2
+; CHECK-NEXT: csel r7, r7, r5, gt
+; CHECK-NEXT: cmp r7, r4
; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #1
-; CHECK-NEXT: csel r5, r11, r5, gt
-; CHECK-NEXT: cmp r5, r10
-; CHECK-NEXT: csel r6, r7, r6, gt
-; CHECK-NEXT: add.w r7, r7, #4
-; CHECK-NEXT: csel r12, r10, r5, gt
+; CHECK-NEXT: addgt.w r6, r8, #3
+; CHECK-NEXT: csel r7, r4, r7, gt
+; CHECK-NEXT: add.w r8, r8, #4
+; CHECK-NEXT: cmp r7, r11
+; CHECK-NEXT: csel r10, r8, r6, gt
+; CHECK-NEXT: csel r12, r11, r7, gt
; CHECK-NEXT: le lr, .LBB0_5
-; CHECK-NEXT: .LBB0_6: @ %while.end.loopexit.unr-lcssa
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: beq .LBB0_10
-; CHECK-NEXT: @ %bb.7: @ %while.body.epil
+; CHECK-NEXT: @ %bb.6: @ %while.end.loopexit.unr-lcssa.loopexit
+; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
+; CHECK-NEXT: sub.w r9, r9, r8
+; CHECK-NEXT: cbz r6, .LBB0_10
+; CHECK-NEXT: .LBB0_7: @ %while.body.epil
; CHECK-NEXT: ldr r7, [r0, #4]
; CHECK-NEXT: sub.w r1, r1, r9
; CHECK-NEXT: cmp r12, r7
-; CHECK-NEXT: csel r6, r1, r6, gt
+; CHECK-NEXT: csel r10, r1, r10, gt
; CHECK-NEXT: csel r12, r7, r12, gt
-; CHECK-NEXT: cmp.w r8, #1
+; CHECK-NEXT: cmp r6, #1
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.8: @ %while.body.epil.1
; CHECK-NEXT: ldr r7, [r0, #8]
; CHECK-NEXT: cmp r12, r7
-; CHECK-NEXT: csinc r6, r6, r1, le
+; CHECK-NEXT: csinc r10, r10, r1, le
; CHECK-NEXT: csel r12, r7, r12, gt
-; CHECK-NEXT: cmp.w r8, #2
+; CHECK-NEXT: cmp r6, #2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.9: @ %while.body.epil.2
; CHECK-NEXT: ldr r0, [r0, #12]
; CHECK-NEXT: cmp r12, r0
; CHECK-NEXT: it gt
-; CHECK-NEXT: addgt r6, r1, #2
+; CHECK-NEXT: addgt.w r10, r1, #2
; CHECK-NEXT: csel r12, r0, r12, gt
; CHECK-NEXT: .LBB0_10: @ %while.end
; CHECK-NEXT: str.w r12, [r2]
-; CHECK-NEXT: str r6, [r3]
+; CHECK-NEXT: str.w r10, [r3]
+; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%0 = load i32, ptr %pSrc, align 4
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
index db30fd23b0c9d..1944a9c800355 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
@@ -119,8 +119,6 @@ for.end:
; We can't use postindex addressing on the conditional load of qval and can't
; convert the loop condition to a compare with zero, so we should instead use
; offset addressing.
-; FIXME: Currently we don't notice the load of qval is conditional, and attempt
-; postindex addressing anyway.
define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-LABEL: define i32 @conditional_load(
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
@@ -128,7 +126,6 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
@@ -136,6 +133,8 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]]
; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IDX]], 2
+; CHECK-NEXT: [[LSR_IV:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP0]]
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[RET]], [[QVAL]]
; CHECK-NEXT: br label %[[FOR_INC]]
@@ -143,7 +142,6 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: [[RET_NEXT]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[RET]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
-; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
@@ -176,3 +174,141 @@ for.inc:
exit:
ret i32 %ret.next
}
+
+; We can use postindex addressing for both loads here, even though the second
+; may not be executed on every loop iteration.
+define i32 @early_exit_load(ptr %p, ptr %q, ptr %n) {
+; CHECK-LABEL: define i32 @early_exit_load(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
+; CHECK-NEXT: [[RET_PHI:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[PVAL]], 0
+; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
+; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[EXIT:.*]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[QVAL]], [[RET_PHI]]
+; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
+; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
+; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_PHI]], %[[FOR_BODY]] ], [ [[ADD]], %[[FOR_INC]] ]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %ret.phi = phi i32 [ %add, %for.inc ], [ 0, %entry ]
+ %idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
+ %paddr = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ %pval = load i32, ptr %paddr, align 4
+ %cmp1 = icmp eq i32 %pval, 0
+ br i1 %cmp1, label %for.inc, label %exit
+
+for.inc:
+ %qaddr = getelementptr inbounds nuw i32, ptr %q, i64 %idx
+ %qval = load i32, ptr %qaddr, align 4
+ %add = add nsw i32 %qval, %ret.phi
+ %idx.next = add nuw nsw i64 %idx, 1
+ %nval = load volatile i64, ptr %n, align 8
+ %cmp2 = icmp slt i64 %idx.next, %nval
+ br i1 %cmp2, label %for.body, label %exit
+
+exit:
+ %ret = phi i32 [ %ret.phi, %for.body ], [ %add, %for.inc ]
+ ret i32 %ret
+}
+
+; The control-flow before and after the load of qval shouldn't prevent postindex
+; addressing from happening.
+; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so
+; during codegen we will fail to actually generate a postindex load.
+define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
+; CHECK-LABEL: define void @middle_block_load(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC]] ], [ [[N]], %[[ENTRY]] ]
+; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV2]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[PVAL]], 0
+; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN1:.*]], label %[[IF_ELSE1:.*]]
+; CHECK: [[IF_THEN1]]:
+; CHECK-NEXT: tail call void @otherfn1()
+; CHECK-NEXT: br label %[[IF_END:.*]]
+; CHECK: [[IF_ELSE1]]:
+; CHECK-NEXT: tail call void @otherfn2()
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]]
+; CHECK: [[IF_THEN2]]:
+; CHECK-NEXT: tail call void @otherfn1()
+; CHECK-NEXT: br label %[[FOR_INC]]
+; CHECK: [[IF_ELSE2]]:
+; CHECK-NEXT: tail call void @otherfn2()
+; CHECK-NEXT: br label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
+; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
+ %paddr = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+ %pval = load i32, ptr %paddr, align 4
+ %cmp1 = icmp sgt i32 %pval, 0
+ br i1 %cmp1, label %if.then1, label %if.else1
+
+if.then1:
+ tail call void @otherfn1()
+ br label %if.end
+
+if.else1:
+ tail call void @otherfn2()
+ br label %if.end
+
+if.end:
+ %qaddr = getelementptr inbounds nuw i32, ptr %q, i64 %idx
+ %qval = load i32, ptr %qaddr, align 4
+ %cmp2 = icmp sgt i32 %qval, 0
+ br i1 %cmp2, label %if.then2, label %if.else2
+
+if.then2:
+ tail call void @otherfn1()
+ br label %for.inc
+
+if.else2:
+ tail call void @otherfn2()
+ br label %for.inc
+
+for.inc:
+ %idx.next = add nuw nsw i64 %idx, 1
+ %cmp3 = icmp eq i64 %idx.next, %n
+ br i1 %cmp3, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+declare dso_local void @otherfn1()
+declare dso_local void @otherfn2()
More information about the llvm-commits
mailing list