[llvm] [LoopFuse] Don't preserve SCEV analysis (PR #153547)

Madhur Amilkanthwar via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 14 00:30:13 PDT 2025


https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/153547

>From 101b07c802583cb1c8152c48c5c7d6d0c5fcc3c6 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 14 Aug 2025 00:12:35 -0700
Subject: [PATCH 1/2] [LoopFuse] Don't preserve SCEV analysis

This patch fixes issue while verifying SCEV.
As the loops are fused, SCEV information is not valid
and rather then recomputing it in the pass,
we should prefer to let pass manager recompute it for
us when next pass requests it.

When there is no fusion, all analyses are preserved.
---
 llvm/lib/Transforms/Scalar/LoopFuse.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index b5eb647a042b9..34d3abe0429ab 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -639,7 +639,6 @@ struct LoopFuser {
     assert(DT.verify());
     assert(PDT.verify());
     LI.verify(DT);
-    SE.verify();
 #endif
 
     LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
@@ -1786,7 +1785,6 @@ struct LoopFuser {
     assert(DT.verify(DominatorTree::VerificationLevel::Fast));
     assert(PDT.verify());
     LI.verify(DT);
-    SE.verify();
 #endif
 
     LLVM_DEBUG(dbgs() << "Fusion done:\n");
@@ -2082,7 +2080,6 @@ struct LoopFuser {
     assert(DT.verify(DominatorTree::VerificationLevel::Fast));
     assert(PDT.verify());
     LI.verify(DT);
-    SE.verify();
 #endif
 
     LLVM_DEBUG(dbgs() << "Fusion done:\n");
@@ -2122,7 +2119,6 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
   PreservedAnalyses PA;
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<PostDominatorTreeAnalysis>();
-  PA.preserve<ScalarEvolutionAnalysis>();
   PA.preserve<LoopAnalysis>();
   return PA;
 }

>From a4b99466769f43286d444251ccfea7700b98c937 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 14 Aug 2025 00:29:43 -0700
Subject: [PATCH 2/2] fixup! [LoopFuse] Don't preserve SCEV analysis

Add test case
---
 .../test/Transforms/LoopFusion/loop-guards.ll | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopFusion/loop-guards.ll

diff --git a/llvm/test/Transforms/LoopFusion/loop-guards.ll b/llvm/test/Transforms/LoopFusion/loop-guards.ll
new file mode 100644
index 0000000000000..90f1e26540083
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/loop-guards.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s
+
+; This test would fail if SCEV analysis is marked as preserved.
+
+define i32 @loop_guards() {
+; CHECK-LABEL: define i32 @loop_guards() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[VLA48:%.*]] = alloca [1024 x i32], align 16
+; CHECK-NEXT:    [[VLA149:%.*]] = alloca [1024 x i32], align 16
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @rand()
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 100
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[REM]], 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA48]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_BODY4_PREHEADER:.*]], label %[[FOR_BODY]]
+; CHECK:       [[FOR_BODY4_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_BODY4:.*]]
+; CHECK:       [[FOR_BODY4]]:
+; CHECK-NEXT:    [[INDVARS_IV54:%.*]] = phi i64 [ [[INDVARS_IV_NEXT55:%.*]], %[[FOR_INC26:.*]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ]
+; CHECK-NEXT:    [[INDVARS_IV58:%.*]] = phi i64 [ [[INDVARS_IV_NEXT59:%.*]], %[[FOR_INC26]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[VLA48]], i64 [[INDVARS_IV54]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[VLA149]], i64 [[INDVARS_IV54]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[VLA48]], i64 [[INDVARS_IV58]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    [[CMP18:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[CMP18]], label %[[IF_THEN:.*]], label %[[FOR_INC26]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[MUL23:%.*]] = mul nsw i32 [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA149]], i64 [[INDVARS_IV58]]
+; CHECK-NEXT:    store i32 [[MUL23]], ptr [[ARRAYIDX25]], align 4
+; CHECK-NEXT:    br label %[[FOR_INC26]]
+; CHECK:       [[FOR_INC26]]:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT55]] = add nuw nsw i64 [[INDVARS_IV54]], 1
+; CHECK-NEXT:    [[EXITCOND57_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT55]], 1024
+; CHECK-NEXT:    [[INDVARS_IV_NEXT59]] = add nuw nsw i64 [[INDVARS_IV58]], 1
+; CHECK-NEXT:    [[EXITCOND61_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT59]], 1024
+; CHECK-NEXT:    br i1 [[EXITCOND61_NOT]], label %[[FOR_END28:.*]], label %[[FOR_BODY4]]
+; CHECK:       [[FOR_END28]]:
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds i8, ptr [[VLA149]], i64 4092
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX30]], align 4
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+entry:
+  %vla48 = alloca [1024 x i32], align 16
+  %vla149 = alloca [1024 x i32], align 16
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @rand() #2
+  %rem = srem i32 %call, 100
+  %add = add nsw i32 %rem, 1
+  %arrayidx = getelementptr inbounds i32, ptr %vla48, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond.not, label %for.body4, label %for.body
+
+for.body4:
+  %indvars.iv54 = phi i64 [ %indvars.iv.next55, %for.body4 ], [ 0, %for.body ]
+  %arrayidx6 = getelementptr inbounds i32, ptr %vla48, i64 %indvars.iv54
+  %0 = load i32, ptr %arrayidx6, align 4
+  %mul = shl nsw i32 %0, 1
+  %arrayidx8 = getelementptr inbounds i32, ptr %vla149, i64 %indvars.iv54
+  store i32 %mul, ptr %arrayidx8, align 4
+  %indvars.iv.next55 = add nuw nsw i64 %indvars.iv54, 1
+  %exitcond57.not = icmp eq i64 %indvars.iv.next55, 1024
+  br i1 %exitcond57.not, label %for.body14, label %for.body4
+
+for.body14:
+  %indvars.iv58 = phi i64 [ %indvars.iv.next59, %for.inc26 ], [ 0, %for.body4 ]
+  %arrayidx16 = getelementptr inbounds i32, ptr %vla48, i64 %indvars.iv58
+  %1 = load i32, ptr %arrayidx16, align 4
+  %2 = and i32 %1, 1
+  %cmp18 = icmp eq i32 %2, 0
+  br i1 %cmp18, label %if.then, label %for.inc26
+
+if.then:
+  %mul23 = mul nsw i32 %1, %1
+  %arrayidx25 = getelementptr inbounds i32, ptr %vla149, i64 %indvars.iv58
+  store i32 %mul23, ptr %arrayidx25, align 4
+  br label %for.inc26
+
+for.inc26:
+  %indvars.iv.next59 = add nuw nsw i64 %indvars.iv58, 1
+  %exitcond61.not = icmp eq i64 %indvars.iv.next59, 1024
+  br i1 %exitcond61.not, label %for.end28, label %for.body14
+
+for.end28:
+  %arrayidx30 = getelementptr inbounds i8, ptr %vla149, i64 4092
+  %3 = load i32, ptr %arrayidx30, align 4
+  ret i32 %3
+}
+
+declare i32 @rand()
+



More information about the llvm-commits mailing list