[llvm] [LoopFuse] Don't preserve SCEV analysis (PR #153547)
Madhur Amilkanthwar via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 14 03:28:23 PDT 2025
https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/153547
>From 101b07c802583cb1c8152c48c5c7d6d0c5fcc3c6 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 14 Aug 2025 00:12:35 -0700
Subject: [PATCH 1/2] [LoopFuse] Don't preserve SCEV analysis
This patch fixes issue while verifying SCEV.
As the loops are fused, SCEV information is not valid
and rather then recomputing it in the pass,
we should prefer to let pass manager recompute it for
us when next pass requests it.
When there is no fusion, all analyses are preserved.
---
llvm/lib/Transforms/Scalar/LoopFuse.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index b5eb647a042b9..34d3abe0429ab 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -639,7 +639,6 @@ struct LoopFuser {
assert(DT.verify());
assert(PDT.verify());
LI.verify(DT);
- SE.verify();
#endif
LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
@@ -1786,7 +1785,6 @@ struct LoopFuser {
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
assert(PDT.verify());
LI.verify(DT);
- SE.verify();
#endif
LLVM_DEBUG(dbgs() << "Fusion done:\n");
@@ -2082,7 +2080,6 @@ struct LoopFuser {
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
assert(PDT.verify());
LI.verify(DT);
- SE.verify();
#endif
LLVM_DEBUG(dbgs() << "Fusion done:\n");
@@ -2122,7 +2119,6 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<PostDominatorTreeAnalysis>();
- PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<LoopAnalysis>();
return PA;
}
>From e1bd5cd800883f23259e06740ab2db939539de79 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 14 Aug 2025 00:29:43 -0700
Subject: [PATCH 2/2] fixup! [LoopFuse] Don't preserve SCEV analysis
Add test case
---
llvm/lib/Transforms/Scalar/LoopFuse.cpp | 6 +-
.../test/Transforms/LoopFusion/loop-guards.ll | 106 ++++++++++++++++++
2 files changed, 109 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/LoopFusion/loop-guards.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 34d3abe0429ab..536856ea3869d 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -314,7 +314,7 @@ struct FusionCandidate {
/// Determine if a fusion candidate (representing a loop) is eligible for
/// fusion. Note that this only checks whether a single loop can be fused - it
/// does not check whether it is *legal* to fuse two loops together.
- bool isEligibleForFusion(ScalarEvolution &SE) const {
+ bool isEligibleForFusion(ScalarEvolution &SE, bool VerifySCEV = true) const {
if (!isValid()) {
LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
if (!Preheader)
@@ -334,7 +334,7 @@ struct FusionCandidate {
}
// Require ScalarEvolution to be able to determine a trip count.
- if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+ if (VerifySCEV && !SE.hasLoopInvariantBackedgeTakenCount(L)) {
LLVM_DEBUG(dbgs() << "Loop " << L->getName()
<< " trip count not computable!\n");
return reportInvalidCandidate(UnknownTripCount);
@@ -1035,7 +1035,7 @@ struct LoopFuser {
performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
FC0Copy.PP);
FusedCand.verify();
- assert(FusedCand.isEligibleForFusion(SE) &&
+ assert(FusedCand.isEligibleForFusion(SE, false) &&
"Fused candidate should be eligible for fusion!");
// Notify the loop-depth-tree that these loops are not valid objects
diff --git a/llvm/test/Transforms/LoopFusion/loop-guards.ll b/llvm/test/Transforms/LoopFusion/loop-guards.ll
new file mode 100644
index 0000000000000..90f1e26540083
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/loop-guards.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s
+
+; This test would fail if SCEV analysis is marked as preserved.
+
+define i32 @loop_guards() {
+; CHECK-LABEL: define i32 @loop_guards() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[VLA48:%.*]] = alloca [1024 x i32], align 16
+; CHECK-NEXT: [[VLA149:%.*]] = alloca [1024 x i32], align 16
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @rand()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 100
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[REM]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA48]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_BODY4_PREHEADER:.*]], label %[[FOR_BODY]]
+; CHECK: [[FOR_BODY4_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY4:.*]]
+; CHECK: [[FOR_BODY4]]:
+; CHECK-NEXT: [[INDVARS_IV54:%.*]] = phi i64 [ [[INDVARS_IV_NEXT55:%.*]], %[[FOR_INC26:.*]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ]
+; CHECK-NEXT: [[INDVARS_IV58:%.*]] = phi i64 [ [[INDVARS_IV_NEXT59:%.*]], %[[FOR_INC26]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[VLA48]], i64 [[INDVARS_IV54]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP0]], 1
+; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[VLA149]], i64 [[INDVARS_IV54]]
+; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[VLA48]], i64 [[INDVARS_IV58]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT: [[CMP18:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[CMP18]], label %[[IF_THEN:.*]], label %[[FOR_INC26]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[MUL23:%.*]] = mul nsw i32 [[TMP1]], [[TMP1]]
+; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA149]], i64 [[INDVARS_IV58]]
+; CHECK-NEXT: store i32 [[MUL23]], ptr [[ARRAYIDX25]], align 4
+; CHECK-NEXT: br label %[[FOR_INC26]]
+; CHECK: [[FOR_INC26]]:
+; CHECK-NEXT: [[INDVARS_IV_NEXT55]] = add nuw nsw i64 [[INDVARS_IV54]], 1
+; CHECK-NEXT: [[EXITCOND57_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT55]], 1024
+; CHECK-NEXT: [[INDVARS_IV_NEXT59]] = add nuw nsw i64 [[INDVARS_IV58]], 1
+; CHECK-NEXT: [[EXITCOND61_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT59]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND61_NOT]], label %[[FOR_END28:.*]], label %[[FOR_BODY4]]
+; CHECK: [[FOR_END28]]:
+; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i8, ptr [[VLA149]], i64 4092
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX30]], align 4
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+entry:
+ %vla48 = alloca [1024 x i32], align 16
+ %vla149 = alloca [1024 x i32], align 16
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %call = tail call i32 @rand() #2
+ %rem = srem i32 %call, 100
+ %add = add nsw i32 %rem, 1
+ %arrayidx = getelementptr inbounds i32, ptr %vla48, i64 %indvars.iv
+ store i32 %add, ptr %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond.not, label %for.body4, label %for.body
+
+for.body4:
+ %indvars.iv54 = phi i64 [ %indvars.iv.next55, %for.body4 ], [ 0, %for.body ]
+ %arrayidx6 = getelementptr inbounds i32, ptr %vla48, i64 %indvars.iv54
+ %0 = load i32, ptr %arrayidx6, align 4
+ %mul = shl nsw i32 %0, 1
+ %arrayidx8 = getelementptr inbounds i32, ptr %vla149, i64 %indvars.iv54
+ store i32 %mul, ptr %arrayidx8, align 4
+ %indvars.iv.next55 = add nuw nsw i64 %indvars.iv54, 1
+ %exitcond57.not = icmp eq i64 %indvars.iv.next55, 1024
+ br i1 %exitcond57.not, label %for.body14, label %for.body4
+
+for.body14:
+ %indvars.iv58 = phi i64 [ %indvars.iv.next59, %for.inc26 ], [ 0, %for.body4 ]
+ %arrayidx16 = getelementptr inbounds i32, ptr %vla48, i64 %indvars.iv58
+ %1 = load i32, ptr %arrayidx16, align 4
+ %2 = and i32 %1, 1
+ %cmp18 = icmp eq i32 %2, 0
+ br i1 %cmp18, label %if.then, label %for.inc26
+
+if.then:
+ %mul23 = mul nsw i32 %1, %1
+ %arrayidx25 = getelementptr inbounds i32, ptr %vla149, i64 %indvars.iv58
+ store i32 %mul23, ptr %arrayidx25, align 4
+ br label %for.inc26
+
+for.inc26:
+ %indvars.iv.next59 = add nuw nsw i64 %indvars.iv58, 1
+ %exitcond61.not = icmp eq i64 %indvars.iv.next59, 1024
+ br i1 %exitcond61.not, label %for.end28, label %for.body14
+
+for.end28:
+ %arrayidx30 = getelementptr inbounds i8, ptr %vla149, i64 4092
+ %3 = load i32, ptr %arrayidx30, align 4
+ ret i32 %3
+}
+
+declare i32 @rand()
+
More information about the llvm-commits
mailing list