[llvm] e44f4a8 - [LoopFusion] Move instructions from FC1.GuardBlock to FC0.GuardBlock and
Whitney Tsang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 30 10:02:38 PST 2020
Author: Whitney Tsang
Date: 2020-01-30T18:02:22Z
New Revision: e44f4a8a54141d5f527ed8ee05362cc98031d723
URL: https://github.com/llvm/llvm-project/commit/e44f4a8a54141d5f527ed8ee05362cc98031d723
DIFF: https://github.com/llvm/llvm-project/commit/e44f4a8a54141d5f527ed8ee05362cc98031d723.diff
LOG: [LoopFusion] Move instructions from FC1.GuardBlock to FC0.GuardBlock and
from FC0.ExitBlock to FC1.ExitBlock when proven safe.
Summary:
Currently LoopFusion give up when the second loop nest guard
block or the first loop nest exit block is not empty. For example:
if (0 < N) {
for (int i = 0; i < N; ++i) {}
x+=1;
}
y+=1;
if (0 < N) {
for (int i = 0; i < N; ++i) {}
}
The above example should be safe to fuse.
This PR moves instructions in FC1 guard block (e.g. y+=1;) to
FC0 guard block, or instructions in FC0 exit block (e.g. x+=1;) to
FC1 exit block, which then LoopFusion is able to fuse them.
Reviewer: kbarton, jdoerfert, Meinersbur, dmgreen, fhahn, hfinkel,
bmahjour, etiotto
Reviewed By: jdoerfert
Subscribers: hiraditya, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D73641
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopFuse.cpp
llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
llvm/test/Transforms/LoopFusion/guarded.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 24597e228870..8d591d783f2e 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -91,8 +91,10 @@ STATISTIC(
"Loop has a non-empty preheader with instructions that cannot be moved");
STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
STATISTIC(NonIdenticalGuards, "Candidates have
diff erent guards");
-STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
-STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
+STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
+ "instructions that cannot be moved");
+STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
+ "instructions that cannot be moved");
STATISTIC(NotRotated, "Candidate is not rotated");
enum FusionDependenceAnalysisChoice {
@@ -750,25 +752,30 @@ struct LoopFuser {
continue;
}
- // The following two checks look for empty blocks in FC0 and FC1. If
- // any of these blocks are non-empty, we do not fuse. This is done
- // because we currently do not have the safety checks to determine if
- // it is safe to move the blocks past other blocks in the loop. Once
- // these checks are added, these conditions can be relaxed.
- if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
- "block. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyExitBlock);
- continue;
- }
+ if (FC0->GuardBranch) {
+ assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
+
+ if (!isSafeToMoveBefore(*FC0->ExitBlock,
+ *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
+ PDT, DI)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+ "instructions in exit block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyExitBlock);
+ continue;
+ }
- if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard "
- "block. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyGuardBlock);
- continue;
+ if (!isSafeToMoveBefore(
+ *FC1->GuardBranch->getParent(),
+ *FC0->GuardBranch->getParent()->getTerminator(), DT, PDT,
+ DI)) {
+ LLVM_DEBUG(dbgs()
+ << "Fusion candidate contains unsafe "
+ "instructions in guard block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyGuardBlock);
+ continue;
+ }
}
// Check the dependencies across the loops and do not fuse if it would
@@ -1079,38 +1086,6 @@ struct LoopFuser {
return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
}
- /// Check that the guard for \p FC *only* contains the cmp/branch for the
- /// guard.
- /// Once we are able to handle intervening code, any code in the guard block
- /// for FC1 will need to be treated as intervening code and checked whether
- /// it can safely move around the loops.
- bool isEmptyGuardBlock(const FusionCandidate &FC) const {
- assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch.");
- if (auto *CmpInst = dyn_cast<Instruction>(FC.GuardBranch->getCondition())) {
- auto *GuardBlock = FC.GuardBranch->getParent();
- // If the generation of the cmp value is in GuardBlock, then the size of
- // the guard block should be 2 (cmp + branch). If the generation of the
- // cmp value is in a
diff erent block, then the size of the guard block
- // should only be 1.
- if (CmpInst->getParent() == GuardBlock)
- return GuardBlock->size() == 2;
- else
- return GuardBlock->size() == 1;
- }
-
- return false;
- }
-
- bool isEmptyPreheader(const FusionCandidate &FC) const {
- assert(FC.Preheader && "Expecting a valid preheader");
- return FC.Preheader->size() == 1;
- }
-
- bool isEmptyExitBlock(const FusionCandidate &FC) const {
- assert(FC.ExitBlock && "Expecting a valid exit block");
- return FC.ExitBlock->size() == 1;
- }
-
/// Simplify the condition of the latch branch of \p FC to true, when both of
/// its successors are the same.
void simplifyLatchBranch(const FusionCandidate &FC) const {
@@ -1390,6 +1365,14 @@ struct LoopFuser {
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
+ // Move instructions from the exit block of FC0 to the beginning of the exit
+ // block of FC1.
+ moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI);
+
+ // Move instructions from the guard block of FC1 to the end of the guard
+ // block of FC0.
+ moveInstructionsToTheEnd(*FC1GuardBlock, *FC0GuardBlock, DT, PDT, DI);
+
assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
index fe88f3d458b5..7bfc0e9c7b15 100644
--- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
+++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
@@ -213,6 +213,93 @@ for.second:
for.end:
ret void
}
+
+; CHECK: remark: diagnostics_missed.c:67:3: [unsafe_exitblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty exit block with instructions that cannot be moved
+define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B, i64 %N) {
+for.first.guard:
+ %cmp3 = icmp slt i64 0, %N
+ br i1 %cmp3, label %for.first.preheader, label %for.second.guard
+
+for.first.preheader:
+ br label %for.first, !dbg !83
+
+for.first:
+ %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nsw i64 %i.04, 1
+ %cmp = icmp slt i64 %inc, %N
+ br i1 %cmp, label %for.first, label %for.first.exit
+
+for.first.exit:
+ call void @bar()
+ br label %for.second.guard
+
+for.second.guard:
+ %cmp21 = icmp slt i64 0, %N
+ br i1 %cmp21, label %for.second.preheader, label %for.end
+
+for.second.preheader:
+ br label %for.second
+
+for.second:
+ %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
+ %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
+ store i32 0, i32* %arrayidx4, align 4
+ %inc6 = add nsw i64 %j.02, 1
+ %cmp2 = icmp slt i64 %inc6, %N
+ br i1 %cmp2, label %for.second, label %for.second.exit
+
+for.second.exit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK: remark: diagnostics_missed.c:72:3: [unsafe_guardblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty guard block with instructions that cannot be moved
+define void @unsafe_guardblock(i32* noalias %A, i32* noalias %B, i64 %N) {
+for.first.guard:
+ %cmp3 = icmp slt i64 0, %N
+ br i1 %cmp3, label %for.first.preheader, label %for.second.guard
+
+for.first.preheader:
+ br label %for.first, !dbg !86
+
+for.first:
+ %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nsw i64 %i.04, 1
+ %cmp = icmp slt i64 %inc, %N
+ br i1 %cmp, label %for.first, label %for.first.exit
+
+for.first.exit:
+ br label %for.second.guard
+
+for.second.guard:
+ call void @bar()
+ %cmp21 = icmp slt i64 0, %N
+ br i1 %cmp21, label %for.second.preheader, label %for.end
+
+for.second.preheader:
+ br label %for.second
+
+for.second:
+ %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
+ %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
+ store i32 0, i32* %arrayidx4, align 4
+ %inc6 = add nsw i64 %j.02, 1
+ %cmp2 = icmp slt i64 %inc6, %N
+ br i1 %cmp2, label %for.second, label %for.second.exit
+
+for.second.exit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
declare void @bar()
attributes #0 = { nounwind readnone speculatable willreturn }
@@ -301,3 +388,9 @@ attributes #0 = { nounwind readnone speculatable willreturn }
!78 = !{}
!79 = distinct !DILexicalBlock(scope: !77, file: !3, line: 3, column: 5)
!80 = !DILocation(line: 62, column: 3, scope: !79)
+!81 = distinct !DISubprogram(name: "unsafe_exitblock", scope: !3, file: !3, line: 65, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
+!82 = distinct !DILexicalBlock(scope: !81, file: !3, line: 3, column: 5)
+!83 = !DILocation(line: 67, column: 3, scope: !82)
+!84 = distinct !DISubprogram(name: "unsafe_guardblock", scope: !3, file: !3, line: 70, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
+!85 = distinct !DILexicalBlock(scope: !84, file: !3, line: 3, column: 5)
+!86 = !DILocation(line: 72, column: 3, scope: !85)
diff --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll
index 9d64a39daab9..e3fc58c1f2a6 100644
--- a/llvm/test/Transforms/LoopFusion/guarded.ll
+++ b/llvm/test/Transforms/LoopFusion/guarded.ll
@@ -119,3 +119,116 @@ for.second.exit:
for.end:
ret void
}
+
+; Test that `%add` is moved in for.second.exit, and the two loops for.first
+; and for.second are fused.
+
+; CHECK: void @moveinsts_exitblock
+; CHECK-LABEL: for.first.guard:
+; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
+; CHECK-LABEL: for.first.preheader:
+; CHECK-NEXT: br label %for.first
+; CHECK-LABEL: for.first:
+; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
+; CHECK-LABEL: for.second.exit:
+; CHECK-NEXT: %add = add nsw i32 %x, 1
+; CHECK-NEXT: br label %for.end
+; CHECK-LABEL: for.end:
+; CHECK-NEXT: ret void
+define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
+for.first.guard:
+ %cmp.guard = icmp slt i64 0, %N
+ br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
+
+for.first.preheader:
+ br label %for.first
+
+for.first:
+ %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nsw i64 %i.04, 1
+ %cmp = icmp slt i64 %inc, %N
+ br i1 %cmp, label %for.first, label %for.first.exit
+
+for.first.exit:
+ %add = add nsw i32 %x, 1
+ br label %for.second.guard
+
+for.second.guard:
+ br i1 %cmp.guard, label %for.second.preheader, label %for.end
+
+for.second.preheader:
+ br label %for.second
+
+for.second:
+ %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
+ %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
+ store i32 0, i32* %arrayidx4, align 4
+ %inc6 = add nsw i64 %j.02, 1
+ %cmp.j = icmp slt i64 %inc6, %N
+ br i1 %cmp.j, label %for.second, label %for.second.exit
+
+for.second.exit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; Test that `%add` is moved in for.first.guard, and the two loops for.first
+; and for.second are fused.
+
+; CHECK: void @moveinsts_guardblock
+; CHECK-LABEL: for.first.guard:
+; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N
+; CHECK-NEXT: %add = add nsw i32 %x, 1
+; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
+; CHECK-LABEL: for.first.preheader:
+; CHECK-NEXT: br label %for.first
+; CHECK-LABEL: for.first:
+; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
+; CHECK-LABEL: for.second.exit:
+; CHECK-NEXT: br label %for.end
+; CHECK-LABEL: for.end:
+; CHECK-NEXT: ret void
+define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
+for.first.guard:
+ %cmp.guard = icmp slt i64 0, %N
+ br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
+
+for.first.preheader:
+ br label %for.first
+
+for.first:
+ %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
+ store i32 0, i32* %arrayidx, align 4
+ %inc = add nsw i64 %i.04, 1
+ %cmp = icmp slt i64 %inc, %N
+ br i1 %cmp, label %for.first, label %for.first.exit
+
+for.first.exit:
+ br label %for.second.guard
+
+for.second.guard:
+ %add = add nsw i32 %x, 1
+ br i1 %cmp.guard, label %for.second.preheader, label %for.end
+
+for.second.preheader:
+ br label %for.second
+
+for.second:
+ %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
+ %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
+ store i32 0, i32* %arrayidx4, align 4
+ %inc6 = add nsw i64 %j.02, 1
+ %cmp.j = icmp slt i64 %inc6, %N
+ br i1 %cmp.j, label %for.second, label %for.second.exit
+
+for.second.exit:
+ br label %for.end
+
+for.end:
+ ret void
+}
More information about the llvm-commits
mailing list