[llvm-branch-commits] [llvm] [mlir] Release/14.x (PR #104042)
Shravan Kumar via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 14 08:36:37 PDT 2024
https://github.com/shravankumar0811 created https://github.com/llvm/llvm-project/pull/104042
None
>From cfd62625658626c24e9549fa5c6e07aadfe2d792 Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Tue, 28 Jun 2022 17:34:18 +0300
Subject: [PATCH 1/8] Adding cfcss pass
---
llvm/lib/Transforms/CMakeLists.txt | 1 +
llvm/lib/Transforms/Cfcss/CMakeLists.txt | 20 +++
llvm/lib/Transforms/Cfcss/Cfcss.cpp | 165 ++++++++++++++++++++
llvm/lib/Transforms/Cfcss/Cfscc.exports | 0
tests/CMakeLists.txt | 1 +
tests/cfcss/cfcss.c | 10 ++
tests/cfcss/cfcss.ll | 65 ++++++++
tests/cfcss/command.sh | 2 +
tests/cfcss/out1_cfcss.ll | 186 +++++++++++++++++++++++
tests/cfcss/out_cfcss.ll | 104 +++++++++++++
10 files changed, 554 insertions(+)
create mode 100644 llvm/lib/Transforms/Cfcss/CMakeLists.txt
create mode 100644 llvm/lib/Transforms/Cfcss/Cfcss.cpp
create mode 100644 llvm/lib/Transforms/Cfcss/Cfscc.exports
create mode 100644 tests/CMakeLists.txt
create mode 100644 tests/cfcss/cfcss.c
create mode 100644 tests/cfcss/cfcss.ll
create mode 100644 tests/cfcss/command.sh
create mode 100644 tests/cfcss/out1_cfcss.ll
create mode 100644 tests/cfcss/out_cfcss.ll
diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt
index dda5f6de11e326..5ed9ca62265fe8 100644
--- a/llvm/lib/Transforms/CMakeLists.txt
+++ b/llvm/lib/Transforms/CMakeLists.txt
@@ -9,3 +9,4 @@ add_subdirectory(Hello)
add_subdirectory(ObjCARC)
add_subdirectory(Coroutines)
add_subdirectory(CFGuard)
+add_subdirectory(Cfcss)
diff --git a/llvm/lib/Transforms/Cfcss/CMakeLists.txt b/llvm/lib/Transforms/Cfcss/CMakeLists.txt
new file mode 100644
index 00000000000000..4dc70e819e0cf7
--- /dev/null
+++ b/llvm/lib/Transforms/Cfcss/CMakeLists.txt
@@ -0,0 +1,20 @@
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+if( NOT LLVM_REQUIRES_RTTI )
+ if( NOT LLVM_REQUIRES_EH )
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Cfscc.exports)
+ endif()
+endif()
+
+if(WIN32 OR CYGWIN)
+ set(LLVM_LINK_COMPONENTS Core Support)
+endif()
+
+add_llvm_library( LLVMCfcss MODULE BUILDTREE_ONLY
+ Cfcss.cpp
+
+ DEPENDS
+ intrinsics_gen
+ PLUGIN_TOOL
+ opt
+ )
diff --git a/llvm/lib/Transforms/Cfcss/Cfcss.cpp b/llvm/lib/Transforms/Cfcss/Cfcss.cpp
new file mode 100644
index 00000000000000..5bdcd478b1b9bb
--- /dev/null
+++ b/llvm/lib/Transforms/Cfcss/Cfcss.cpp
@@ -0,0 +1,165 @@
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <string>
+
+using namespace llvm;
+
+namespace {
+// Hello2 - The second implementation with getAnalysisUsage implemented.
+struct Cfcss : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ Cfcss() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override {
+
+ for (Function &F : M) {
+
+ if (F.getName() != "__cfcss_error" && F.getName() != "printf" &&
+ F.getName() != "exit") {
+
+ IRBuilder<> Builder((F.begin())->getFirstNonPHI());
+
+ GlobalVariable *GV = new llvm::GlobalVariable(
+ *F.getParent(), IntegerType::getInt32Ty((F.getContext())), false,
+ llvm::GlobalValue::InternalLinkage, Builder.getInt32(0), "G");
+
+ GlobalVariable *Dg = new llvm::GlobalVariable(
+ *F.getParent(), IntegerType::getInt32Ty((F.getContext())), false,
+ llvm::GlobalValue::InternalLinkage, Builder.getInt32(0), "D");
+
+ //While iterating over BB we might get new BB and it is not-exiting
+ SmallVector<llvm::BasicBlock*> VBasicBlock;
+ llvm::DenseMap<BasicBlock *, int> SigMap;
+ llvm::DenseMap<BasicBlock *, int> Dsig;
+ llvm::DenseMap<BasicBlock *, int> Diffsig;
+ llvm::DenseMap<BasicBlock *, Instruction *> BrIMap;
+ int SigCount = 1;
+ BasicBlock *Pbb;
+ LLVMContext &Ctx = M.getContext();
+ FunctionCallee ErrorFunc =
+ M.getOrInsertFunction("__cfcss_error", Builder.getVoidTy());
+ SmallVector<Value *> Arguments;
+
+ // Checking the branch/return instruction of each BB and storing it into
+ // BrIMap.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (isa<BranchInst, ReturnInst>(I)) {
+ BrIMap[&BB] = &I;
+ break;
+ }
+ }
+ }
+
+ // Calculating Signature(s) of each BB and storing it into SigMap.
+ for (BasicBlock &BB : F) {
+ SigMap[&BB] = SigCount;
+ SigCount++;
+ VBasicBlock.push_back(&BB);
+ }
+
+ Builder.CreateStore(Builder.getInt32(1), GV);
+
+ // Calculating Dsig, by xoring Source (S) and Destination (sd) sig.
+ for (BasicBlock &BB : F) {
+ if (BB.hasNPredecessors(1)) {
+ Dsig[&BB] = SigMap[&BB] ^ SigMap[BB.getSinglePredecessor()];
+ }
+ // Calculating Dsig of BB of one predecessors , if BB contains 2
+ // predecessors.
+ if (BB.hasNPredecessorsOrMore(2)) {
+ for (BasicBlock *Pred : predecessors(&BB)) {
+ Pbb = Pred;
+ Dsig[&BB] = SigMap[&BB] ^ SigMap[Pbb];
+ break;
+ }
+ // Calculating Diffsig of each , if BB contains 2 predecessors.
+ for (BasicBlock *Pred : predecessors(&BB)) {
+ Builder.SetInsertPoint((Pred)->getFirstNonPHI());
+ Diffsig[Pred] = SigMap[Pbb] ^ SigMap[Pred];
+ Builder.CreateStore(Builder.getInt32(Diffsig[Pred]), Dg);
+ }
+ }
+ }
+
+
+ // Creating a new BB to emit errors.
+ BasicBlock* ErrorBlock = BasicBlock::Create(Ctx, "ErrorBlock", &F);
+ Builder.SetInsertPoint(ErrorBlock);
+ Builder.CreateCall(ErrorFunc); //To display error message
+ Value *Rzero=Builder.getInt32(0);
+ Builder.CreateRet(Rzero);
+
+
+ // Calculating G, and comparing it with source Signature by calling
+ // error function.
+ for (BasicBlock* BB : VBasicBlock) {
+
+ // G1=s1
+ // if it has 0 predecessor then no need to call error function.as G{i}
+ // and s(i) are initialized to same value.
+
+ // G=Gs^dsig
+ if (BB->hasNPredecessors(1)) {
+ Builder.SetInsertPoint((BB)->getFirstNonPHI());
+ LoadInst *LI = Builder.CreateLoad(Builder.getInt32Ty(), GV);
+ Value *Diff = Builder.CreateXor(LI, Dsig[BB]);
+ Builder.CreateStore(Diff, GV);
+
+ // Value *Args[] = {Diff, Builder.getInt32(SigMap[&BB])};
+ // Builder.SetInsertPoint(BrIMap[&BB]);
+ // Builder.CreateCall(ErrorFunc, Args);
+ Value *Fail=Builder.CreateICmpNE(Diff,Builder.getInt32(SigMap[BB]), "failure" );
+ BasicBlock *Dd=(BB)->splitBasicBlock(dyn_cast<Instruction>(Fail)->getNextNode(), "split");
+ (BB->getTerminator())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ Builder.CreateCondBr(Fail,ErrorBlock, Dd);
+ }
+ // G=Gs^dsig; G=G^D
+ if (BB->hasNPredecessorsOrMore(2)) {
+ Builder.SetInsertPoint((BB)->getFirstNonPHI());
+ LoadInst *LI = Builder.CreateLoad(Builder.getInt32Ty(), GV);
+ LoadInst *DI = Builder.CreateLoad(Builder.getInt32Ty(), Dg);
+ Value *Diff = Builder.CreateXor(LI, Dsig[BB]);
+ Value *Diff1 = Builder.CreateXor(Diff, DI);
+ Builder.CreateStore(Diff1, GV);
+
+ // Value *Args[] = {Diff1, Builder.getInt32(SigMap[&BB])};
+ // Builder.SetInsertPoint(BrIMap[&BB]);
+ // Builder.CreateCall(ErrorFunc, Args);
+ Value *Fail=Builder.CreateICmpNE(Diff1,Builder.getInt32(SigMap[BB]), "failure" );
+ BasicBlock *Dd=(BB)->splitBasicBlock(dyn_cast<Instruction>(Fail)->getNextNode(), "split");
+ (BB->getTerminator())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ Builder.CreateCondBr(Fail,ErrorBlock, Dd);
+ }
+ }
+ }
+ }
+
+ return false;
+ }
+
+ // We don't modify the program, so we preserve all analyses.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+} // namespace
+
+char Cfcss::ID = 0;
+static RegisterPass<Cfcss> Y("cfcss",
+ "Cfcss Pass (with getAnalysisUsage implemented)");
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Cfcss/Cfscc.exports b/llvm/lib/Transforms/Cfcss/Cfscc.exports
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 00000000000000..9569308eb4a637
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(cfcss)
diff --git a/tests/cfcss/cfcss.c b/tests/cfcss/cfcss.c
new file mode 100644
index 00000000000000..e607f2d32120ad
--- /dev/null
+++ b/tests/cfcss/cfcss.c
@@ -0,0 +1,10 @@
+#include<stdio.h>
+#include<stdlib.h>
+void __cfcss_error() {
+ printf(" Signatures do not match");
+ exit(0);
+}
+int main() {
+ for (int i = 0; i < 10; i++)
+ printf(" Value is %d", i);
+}
diff --git a/tests/cfcss/cfcss.ll b/tests/cfcss/cfcss.ll
new file mode 100644
index 00000000000000..143d95b3db3901
--- /dev/null
+++ b/tests/cfcss/cfcss.ll
@@ -0,0 +1,65 @@
+; ModuleID = 'cfcss.c'
+source_filename = "cfcss.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [25 x i8] c" Signatures do not match\00", align 1
+ at .str.1 = private unnamed_addr constant [13 x i8] c" Value is %d\00", align 1
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @__cfcss_error() #0 {
+entry:
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i64 0, i64 0))
+ call void @exit(i32 noundef 0) #3
+ unreachable
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+; Function Attrs: noreturn nounwind
+declare dso_local void @exit(i32 noundef) #2
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([13 x i8], [13 x i8]* @.str.1, i64 0, i64 0), i32 noundef %1)
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %2 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %2, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ %3 = load i32, i32* %retval, align 4
+ ret i32 %3
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { noreturn nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
diff --git a/tests/cfcss/command.sh b/tests/cfcss/command.sh
new file mode 100644
index 00000000000000..496ba116a2ebe6
--- /dev/null
+++ b/tests/cfcss/command.sh
@@ -0,0 +1,2 @@
+clang -O0 -S -emit-llvm cfcss.c -o cfcss.ll
+opt -load ${LLVM_HOME}/build/lib/LLVMCfcss.so -enable-new-pm=0 -cfcss cfcss.ll -S -o out_cfcss.ll
diff --git a/tests/cfcss/out1_cfcss.ll b/tests/cfcss/out1_cfcss.ll
new file mode 100644
index 00000000000000..dbde32637c6907
--- /dev/null
+++ b/tests/cfcss/out1_cfcss.ll
@@ -0,0 +1,186 @@
+; ModuleID = 'out_cfcss.ll'
+source_filename = "cfcss.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [25 x i8] c" Signatures do not match\00", align 1
+ at .str.1 = private unnamed_addr constant [13 x i8] c" Value is %d\00", align 1
+ at G = internal global i32 0
+ at D = internal global i32 0
+ at G.1 = internal global i32 0
+ at D.2 = internal global i32 0
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @__cfcss_error() #0 {
+entry:
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str)
+ call void @exit(i32 noundef 0) #3
+ unreachable
+}
+
+declare i32 @printf(ptr noundef, ...) #1
+
+; Function Attrs: noreturn nounwind
+declare void @exit(i32 noundef) #2
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @main() #0 {
+entry:
+ store i32 6, ptr @D.2, align 4
+ store i32 1, ptr @G.1, align 4
+ store i32 5, ptr @D, align 4
+ store i32 1, ptr @G, align 4
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, ptr %retval, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %split13, %entry
+ %0 = load i32, ptr @G.1, align 4
+ %1 = load i32, ptr @D.2, align 4
+ %2 = xor i32 %0, 5
+ %3 = xor i32 %2, %1
+ store i32 %3, ptr @G.1, align 4
+ %failure2 = icmp ne i32 %3, 2
+ br i1 %failure2, label %ErrorBlock1, label %split3
+
+split3: ; preds = %for.cond
+ store i32 10, ptr @D.2, align 4
+ %4 = load i32, ptr @G, align 4
+ %5 = load i32, ptr @D, align 4
+ %6 = xor i32 %4, 6
+ %7 = xor i32 %6, %5
+ store i32 %7, ptr @G, align 4
+ %failure = icmp ne i32 %7, 2
+ br i1 %failure, label %ErrorBlock, label %split
+
+split: ; preds = %split3
+ %8 = load i32, ptr @G.1, align 4
+ %9 = xor i32 %8, 1
+ store i32 %9, ptr @G.1, align 4
+ %failure4 = icmp ne i32 %9, 3
+ br i1 %failure4, label %ErrorBlock1, label %split5
+
+split5: ; preds = %split
+ %10 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %10, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %split5
+ %11 = load i32, ptr @G.1, align 4
+ %12 = xor i32 %11, 7
+ store i32 %12, ptr @G.1, align 4
+ %failure6 = icmp ne i32 %12, 4
+ br i1 %failure6, label %ErrorBlock1, label %split7
+
+split7: ; preds = %for.body
+ store i32 12, ptr @D.2, align 4
+ %13 = load i32, ptr @G, align 4
+ %14 = xor i32 %13, 1
+ store i32 %14, ptr @G, align 4
+ %failure1 = icmp ne i32 %14, 3
+ br i1 %failure1, label %ErrorBlock, label %split2
+
+split2: ; preds = %split7
+ %15 = load i32, ptr @G.1, align 4
+ %16 = xor i32 %15, 1
+ store i32 %16, ptr @G.1, align 4
+ %failure8 = icmp ne i32 %16, 5
+ br i1 %failure8, label %ErrorBlock1, label %split9
+
+split9: ; preds = %split2
+ %17 = load i32, ptr %i, align 4
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str.1, i32 noundef %17)
+ br label %for.inc
+
+for.inc: ; preds = %split9
+ %18 = load i32, ptr @G.1, align 4
+ %19 = xor i32 %18, 3
+ store i32 %19, ptr @G.1, align 4
+ %failure10 = icmp ne i32 %19, 6
+ br i1 %failure10, label %ErrorBlock1, label %split11
+
+split11: ; preds = %for.inc
+ store i32 14, ptr @D.2, align 4
+ %20 = load i32, ptr @G, align 4
+ %21 = xor i32 %20, 10
+ store i32 %21, ptr @G, align 4
+ %failure3 = icmp ne i32 %21, 4
+ br i1 %failure3, label %ErrorBlock, label %split4
+
+split4: ; preds = %split11
+ %22 = load i32, ptr @G.1, align 4
+ %23 = xor i32 %22, 1
+ store i32 %23, ptr @G.1, align 4
+ %failure12 = icmp ne i32 %23, 7
+ br i1 %failure12, label %ErrorBlock1, label %split13
+
+split13: ; preds = %split4
+ store i32 0, ptr @D.2, align 4
+ store i32 0, ptr @D, align 4
+ %24 = load i32, ptr %i, align 4
+ %inc = add nsw i32 %24, 1
+ store i32 %inc, ptr %i, align 4
+ br label %for.cond, !llvm.loop !6
+
+for.end: ; preds = %split5
+ %25 = load i32, ptr @G.1, align 4
+ %26 = xor i32 %25, 11
+ store i32 %26, ptr @G.1, align 4
+ %failure14 = icmp ne i32 %26, 8
+ br i1 %failure14, label %ErrorBlock1, label %split15
+
+split15: ; preds = %for.end
+ store i32 0, ptr @D.2, align 4
+ %27 = load i32, ptr @G, align 4
+ %28 = xor i32 %27, 7
+ store i32 %28, ptr @G, align 4
+ %failure5 = icmp ne i32 %28, 5
+ br i1 %failure5, label %ErrorBlock, label %split6
+
+split6: ; preds = %split15
+ %29 = load i32, ptr @G.1, align 4
+ %30 = xor i32 %29, 1
+ store i32 %30, ptr @G.1, align 4
+ %failure16 = icmp ne i32 %30, 9
+ br i1 %failure16, label %ErrorBlock1, label %split17
+
+split17: ; preds = %split6
+ %31 = load i32, ptr %retval, align 4
+ ret i32 %31
+
+ErrorBlock: ; preds = %split15, %split11, %split7, %split3
+ %32 = load i32, ptr @G.1, align 4
+ %33 = load i32, ptr @D.2, align 4
+ %34 = xor i32 %32, 2
+ %35 = xor i32 %34, %33
+ store i32 %35, ptr @G.1, align 4
+ %failure18 = icmp ne i32 %35, 10
+ br i1 %failure18, label %ErrorBlock1, label %split19
+
+split19: ; preds = %ErrorBlock
+ call void @__cfcss_error()
+ ret i32 0
+
+ErrorBlock1: ; preds = %ErrorBlock, %split6, %for.end, %split4, %for.inc, %split2, %for.body, %split, %for.cond
+ call void @__cfcss_error()
+ ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { noreturn nounwind }
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 2}
+!5 = !{!"clang version 15.0.0 (https://shravan_kumar0826@bitbucket.org/shravan_kumar0826/llvm-project.git 00bb96a3bfe1901661abfdb27177c1ba6c6920c6)"}
+!6 = distinct !{!6, !7}
+!7 = !{!"llvm.loop.mustprogress"}
diff --git a/tests/cfcss/out_cfcss.ll b/tests/cfcss/out_cfcss.ll
new file mode 100644
index 00000000000000..d2ea7e9baa5370
--- /dev/null
+++ b/tests/cfcss/out_cfcss.ll
@@ -0,0 +1,104 @@
+; ModuleID = 'cfcss.ll'
+source_filename = "cfcss.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [25 x i8] c" Signatures do not match\00", align 1
+ at .str.1 = private unnamed_addr constant [13 x i8] c" Value is %d\00", align 1
+ at G = internal global i32 0
+ at D = internal global i32 0
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @__cfcss_error() #0 {
+entry:
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i64 0, i64 0))
+ call void @exit(i32 noundef 0) #3
+ unreachable
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+; Function Attrs: noreturn nounwind
+declare dso_local void @exit(i32 noundef) #2
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @main() #0 {
+entry:
+ store i32 5, i32* @D, align 4
+ store i32 1, i32* @G, align 4
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %split4, %entry
+ %0 = load i32, i32* @G, align 4
+ %1 = load i32, i32* @D, align 4
+ %2 = xor i32 %0, 6
+ %3 = xor i32 %2, %1
+ store i32 %3, i32* @G, align 4
+ %failure = icmp ne i32 %3, 2
+ br i1 %failure, label %ErrorBlock, label %split
+
+split: ; preds = %for.cond
+ %4 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %4, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %split
+ %5 = load i32, i32* @G, align 4
+ %6 = xor i32 %5, 1
+ store i32 %6, i32* @G, align 4
+ %failure1 = icmp ne i32 %6, 3
+ br i1 %failure1, label %ErrorBlock, label %split2
+
+split2: ; preds = %for.body
+ %7 = load i32, i32* %i, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([13 x i8], [13 x i8]* @.str.1, i64 0, i64 0), i32 noundef %7)
+ br label %for.inc
+
+for.inc: ; preds = %split2
+ %8 = load i32, i32* @G, align 4
+ %9 = xor i32 %8, 7
+ store i32 %9, i32* @G, align 4
+ %failure3 = icmp ne i32 %9, 4
+ br i1 %failure3, label %ErrorBlock, label %split4
+
+split4: ; preds = %for.inc
+ store i32 0, i32* @D, align 4
+ %10 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %split
+ %11 = load i32, i32* @G, align 4
+ %12 = xor i32 %11, 7
+ store i32 %12, i32* @G, align 4
+ %failure5 = icmp ne i32 %12, 5
+ br i1 %failure5, label %ErrorBlock, label %split6
+
+split6: ; preds = %for.end
+ %13 = load i32, i32* %retval, align 4
+ ret i32 %13
+
+ErrorBlock: ; preds = %for.end, %for.inc, %for.body, %for.cond
+ call void @__cfcss_error()
+ ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { noreturn nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
>From 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Tue, 28 Jun 2022 19:18:42 +0300
Subject: [PATCH 2/8] Adding assignment1 answers
---
llvm/lib/Transforms/Hello/Hello.cpp | 218 ++++++++++++++++++++++++----
tests/CMakeLists.txt | 1 +
tests/assignment1/1.c | 6 +
tests/assignment1/1.ll | 42 ++++++
tests/assignment1/2.ll | 48 ++++++
tests/assignment1/command.sh | 2 +
6 files changed, 286 insertions(+), 31 deletions(-)
create mode 100644 tests/assignment1/1.c
create mode 100644 tests/assignment1/1.ll
create mode 100644 tests/assignment1/2.ll
create mode 100644 tests/assignment1/command.sh
diff --git a/llvm/lib/Transforms/Hello/Hello.cpp b/llvm/lib/Transforms/Hello/Hello.cpp
index b0adb5401f8912..00c43b44443ae5 100644
--- a/llvm/lib/Transforms/Hello/Hello.cpp
+++ b/llvm/lib/Transforms/Hello/Hello.cpp
@@ -12,7 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -22,43 +25,196 @@ using namespace llvm;
STATISTIC(HelloCounter, "Counts number of functions greeted");
namespace {
- // Hello - The first implementation, without getAnalysisUsage.
- struct Hello : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- Hello() : FunctionPass(ID) {}
-
- bool runOnFunction(Function &F) override {
- ++HelloCounter;
- errs() << "Hello: ";
- errs().write_escaped(F.getName()) << '\n';
- return false;
- }
- };
-}
+// Hello - The first implementation, without getAnalysisUsage.
+struct Hello : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Hello() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override {
+ ++HelloCounter;
+ errs() << "Hello: ";
+ errs().write_escaped(F.getName()) << '\n';
+ return false;
+ }
+};
+} // namespace
char Hello::ID = 0;
static RegisterPass<Hello> X("hello", "Hello World Pass");
namespace {
- // Hello2 - The second implementation with getAnalysisUsage implemented.
- struct Hello2 : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- Hello2() : FunctionPass(ID) {}
-
- bool runOnFunction(Function &F) override {
- ++HelloCounter;
- errs() << "Hello: ";
- errs().write_escaped(F.getName()) << '\n';
- return false;
- }
+// Hello2 - The second implementation with getAnalysisUsage implemented.
+struct Hello2 : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Hello2() : FunctionPass(ID) {}
- // We don't modify the program, so we preserve all analyses.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
-}
+ bool runOnFunction(Function &F) override {
+ ++HelloCounter;
+ errs() << "Hello: ";
+ errs().write_escaped(F.getName()) << '\n';
+ return false;
+ }
+
+ // We don't modify the program, so we preserve all analyses.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+} // namespace
char Hello2::ID = 0;
static RegisterPass<Hello2>
-Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
+ Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
+
+namespace {
+// Hello - The first implementation, without getAnalysisUsage.
+struct MyHello : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ MyHello() : FunctionPass(ID) {}
+ int Count = 0;
+ int CountBb = 0;
+ llvm::DenseMap<llvm::StringRef, int> CountI;
+ llvm::DenseMap<llvm::StringRef, int> CountSuc;
+ llvm::DenseMap<llvm::StringRef, int> CountPred;
+
+ bool runOnFunction(Function &F) override {
+
+ // Count the number of instruction in a function
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ ++Count;
+ }
+
+ // count the no of BB in a Function
+ for (BasicBlock &BB : F) {
+ // Print out the name of the basic block if it has one, and then the
+ // number of instructions that it contains
+ // errs() << "Basic block (name=" << BB.getName() << ") has "
+ // << BB.size() << " instructions.\n";
+ ++CountBb;
+ }
+
+ // Find the basic block with maximum instructions.
+ for (BasicBlock &BB : F) {
+ CountI[BB.getName()] = BB.size();
+ }
+
+ // for (llvm::DenseMap<llvm::StringRef, int>::iterator V = CountI.begin(),
+ // E = CountI.end();
+ // V != E; ++V) {
+ // errs() << V->first << " :" << V->second << "\n";
+ // }
+
+ int CurrentMax = 0;
+ llvm::StringRef Maax;
+ for (llvm::DenseMap<llvm::StringRef, int>::iterator V = CountI.begin(),
+ E = CountI.end();
+ V != E; ++V) {
+ if (V->second > CurrentMax) {
+ Maax = V->first;
+ CurrentMax = V->second;
+ }
+ }
+
+ // Find the basic block with maximum successors.
+ BasicBlock *Target = nullptr;
+ int PredCount = 0;
+
+ for (BasicBlock &BB : F) {
+ Target = &BB;
+ PredCount = 0;
+ CountPred[Target->getName()] = PredCount;
+ for (BasicBlock *Pred : predecessors(Target)) {
+ PredCount++;
+ CountPred[Target->getName()] = PredCount;
+ // errs() << "Basic block name=" << Target->getName() << "\t"
+ // << Pred->getName() << "\n";
+ }
+ }
+
+ // for (llvm::DenseMap<llvm::StringRef, int>::iterator V =
+ // CountPred.begin(),
+ // E = CountPred.end();
+ // V != E; ++V) {
+ // errs() << V->first << " :" << V->second << "\n";
+ // }
+
+ int CurrentPredMax = 0;
+ llvm::StringRef MaaxPred;
+ for (llvm::DenseMap<llvm::StringRef, int>::iterator V = CountPred.begin(),
+ E = CountPred.end();
+ V != E; ++V) {
+ if (V->second > CurrentPredMax) {
+ MaaxPred = V->first;
+ CurrentPredMax = V->second;
+ }
+ }
+
+ // Find the basic block with maximum successors.
+ BasicBlock *TargetS = nullptr;
+ int SuccCount = 0;
+
+ for (BasicBlock &BB : F) {
+ TargetS = &BB;
+ SuccCount = 0;
+ // CountSuc[TargetS->getName()] = SuccCount;
+ for (BasicBlock *Succ : successors(TargetS)) {
+ SuccCount++;
+ CountSuc[TargetS->getName()] = SuccCount;
+ // errs() << "Basic block name=" << TargetS->getName() << "\t"
+ // << Succ->getName() << "\n";
+ }
+ }
+
+ // for (llvm::DenseMap<llvm::StringRef, int>::iterator V = CountSuc.begin(),
+ // E = CountSuc.end();
+ // V != E; ++V) {
+ // errs() << V->first << " :" << V->second << "\n";
+ // }
+
+ int CurrentSuccMax = 0;
+ llvm::StringRef MaaxSucc;
+ for (llvm::DenseMap<llvm::StringRef, int>::iterator V = CountSuc.begin(),
+ E = CountSuc.end();
+ V != E; ++V) {
+ if (V->second > CurrentSuccMax) {
+ MaaxSucc = V->first;
+ CurrentSuccMax = V->second;
+ }
+ }
+
+ // Adding global variable using IRBuilder class and store zero to the new
+ // global variable in entry block
+ IRBuilder<> Builder((F.begin())->getFirstNonPHI());
+ GlobalVariable *GV = new llvm::GlobalVariable(
+ *F.getParent(), IntegerType::getInt32Ty((F.getContext())), false,
+ llvm::GlobalValue::InternalLinkage, Builder.getInt32(0), "G");
+ //Builder.CreateStore(Builder.getInt32(0), GV);
+
+ // Store to different sequential numbers from 1 to all other blocks
+ int Counter = 0;
+ for (BasicBlock &BB : F) {
+ Builder.SetInsertPoint(&*BB.begin());
+ auto CountVal = APInt(32, Counter);
+ auto *Var = Builder.getInt(CountVal);
+ Builder.CreateStore(Var, GV);
+ Counter++;
+ }
+
+ errs() << "Total no of instruction in a Function: " << Count << "\n";
+ errs() << "Total no of BBs in a Function: " << CountBb << "\n";
+ errs() << "BasicBlock with max instructions: " << Maax << "->" << CurrentMax
+ << " instructions"
+ << "\n";
+ errs() << "BasicBlock with max predecessors: " << MaaxPred << "->"
+ << CurrentPredMax << " predecessors "
+ << "\n";
+ errs() << "BasicBlock with max successors: " << MaaxSucc << "->"
+ << CurrentSuccMax << " successors "
+ << "\n";
+ return false;
+ }
+};
+} // namespace
+
+char MyHello::ID = 0;
+static RegisterPass<MyHello> Z("myhello", "Hello World Pass");
\ No newline at end of file
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9569308eb4a637..588ebd7a8fda17 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1 +1,2 @@
add_subdirectory(cfcss)
+add_subdirectory(assignment1)
diff --git a/tests/assignment1/1.c b/tests/assignment1/1.c
new file mode 100644
index 00000000000000..a5dc65529c0a2c
--- /dev/null
+++ b/tests/assignment1/1.c
@@ -0,0 +1,6 @@
+int max(int x, int y) {
+ int k;
+ if (x > y) k=x;
+ else k = y;
+ return k;
+}
diff --git a/tests/assignment1/1.ll b/tests/assignment1/1.ll
new file mode 100644
index 00000000000000..4ba7cf0aac5aaa
--- /dev/null
+++ b/tests/assignment1/1.ll
@@ -0,0 +1,42 @@
+; ModuleID = '1.c'
+source_filename = "1.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @max(i32 noundef %x, i32 noundef %y) #0 {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ %k = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %1 = load i32, i32* %y.addr, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %2 = load i32, i32* %x.addr, align 4
+ store i32 %2, i32* %k, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ %3 = load i32, i32* %y.addr, align 4
+ store i32 %3, i32* %k, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %4 = load i32, i32* %k, align 4
+ ret i32 %4
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git cfd62625658626c24e9549fa5c6e07aadfe2d792)"}
diff --git a/tests/assignment1/2.ll b/tests/assignment1/2.ll
new file mode 100644
index 00000000000000..3f559ba706916d
--- /dev/null
+++ b/tests/assignment1/2.ll
@@ -0,0 +1,48 @@
+; ModuleID = '1.ll'
+source_filename = "1.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at G = internal global i32 0
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @max(i32 noundef %x, i32 noundef %y) #0 {
+entry:
+ store i32 0, i32* @G, align 4
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ %k = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %1 = load i32, i32* %y.addr, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ store i32 1, i32* @G, align 4
+ %2 = load i32, i32* %x.addr, align 4
+ store i32 %2, i32* %k, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ store i32 2, i32* @G, align 4
+ %3 = load i32, i32* %y.addr, align 4
+ store i32 %3, i32* %k, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ store i32 3, i32* @G, align 4
+ %4 = load i32, i32* %k, align 4
+ ret i32 %4
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git cfd62625658626c24e9549fa5c6e07aadfe2d792)"}
diff --git a/tests/assignment1/command.sh b/tests/assignment1/command.sh
new file mode 100644
index 00000000000000..210ea0c932dfba
--- /dev/null
+++ b/tests/assignment1/command.sh
@@ -0,0 +1,2 @@
+clang -O0 -S -emit-llvm 1.c -o 1.ll
+opt -load ${LLVM_HOME}/build/lib/LLVMHello.so -enable-new-pm=0 -myhello 1.ll -S -o 2.ll
>From 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8 Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Fri, 1 Jul 2022 09:53:46 +0300
Subject: [PATCH 3/8] Adding scev pass
---
llvm/lib/Transforms/CMakeLists.txt | 1 +
llvm/lib/Transforms/Scev/CMakeLists.txt | 20 ++++
llvm/lib/Transforms/Scev/Scev.cpp | 110 ++++++++++++++++++++++
llvm/lib/Transforms/Scev/Scev.exports | 0
tests/CMakeLists.txt | 1 +
tests/scev/command.sh | 8 ++
tests/scev/out.ll | 83 +++++++++++++++++
tests/scev/out1.ll | 83 +++++++++++++++++
tests/scev/scev.c | 31 +++++++
tests/scev/scev.ll | 116 ++++++++++++++++++++++++
10 files changed, 453 insertions(+)
create mode 100644 llvm/lib/Transforms/Scev/CMakeLists.txt
create mode 100644 llvm/lib/Transforms/Scev/Scev.cpp
create mode 100644 llvm/lib/Transforms/Scev/Scev.exports
create mode 100644 tests/scev/command.sh
create mode 100644 tests/scev/out.ll
create mode 100644 tests/scev/out1.ll
create mode 100644 tests/scev/scev.c
create mode 100644 tests/scev/scev.ll
diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt
index 5ed9ca62265fe8..8ace411e1ca82b 100644
--- a/llvm/lib/Transforms/CMakeLists.txt
+++ b/llvm/lib/Transforms/CMakeLists.txt
@@ -10,3 +10,4 @@ add_subdirectory(ObjCARC)
add_subdirectory(Coroutines)
add_subdirectory(CFGuard)
add_subdirectory(Cfcss)
+add_subdirectory(Scev)
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Scev/CMakeLists.txt b/llvm/lib/Transforms/Scev/CMakeLists.txt
new file mode 100644
index 00000000000000..b5a4d0ea4c7569
--- /dev/null
+++ b/llvm/lib/Transforms/Scev/CMakeLists.txt
@@ -0,0 +1,20 @@
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+if( NOT LLVM_REQUIRES_RTTI )
+ if( NOT LLVM_REQUIRES_EH )
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Scev.exports)
+ endif()
+endif()
+
+if(WIN32 OR CYGWIN)
+ set(LLVM_LINK_COMPONENTS Core Support)
+endif()
+
+add_llvm_library( LLVMScev MODULE BUILDTREE_ONLY
+ Scev.cpp
+
+ DEPENDS
+ intrinsics_gen
+ PLUGIN_TOOL
+ opt
+ )
diff --git a/llvm/lib/Transforms/Scev/Scev.cpp b/llvm/lib/Transforms/Scev/Scev.cpp
new file mode 100644
index 00000000000000..3c7aaaf05c24a8
--- /dev/null
+++ b/llvm/lib/Transforms/Scev/Scev.cpp
@@ -0,0 +1,110 @@
+/*===- Scev.cpp -Creates and Simplifies Recurrences for ‘Expressions involving
+Induction Variables’ Algorithm:
+1. Get ScalarEvolution object.
+2. Use getSCEV for the pointer operands
+3. Take the scev pointer base
+4. Subtract scev with scev pointer base to get the SCEVAddRecExpr(DiffVal).
+eg:{8,+,16}<nuw><nsw><%for.cond>
+5. This SCEVAddRecExpr will contain the required indices and Extract it. eg : 8
+6. Store the index and corresponding Store instruction in StoreInsts map.
+7. Sorting the Offset vector values.
+8. Get the BB of store instruction and using that get the terminator
+instruction.
+9. Move all store instructions one by one before terminator instruction.
+===-------------------------------------------------------------------------------------------===*/
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "hello"
+
+namespace {
+// Scev - The second implementation with getAnalysisUsage implemented.
+struct Scev : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Scev() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override {
+
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ SCEV *ScevVal, *BasePtr, *DiffVal, *GetEle, *TempPtr = nullptr;
+ SmallVector<int> OffSet;
+ llvm::DenseMap<int, Instruction *> StoreInsts;
+
+ int Value = 0;
+
+ // Store the index and corresponding Store instruction in StoreInsts map
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *Store = dyn_cast<StoreInst>(&I)) {
+ if (auto *Gep =
+ dyn_cast<GetElementPtrInst>(Store->getPointerOperand())) {
+ ScevVal = const_cast<SCEV *>(SE.getSCEV(Gep));
+ if ((BasePtr = const_cast<SCEV *>(SE.getPointerBase(ScevVal)))) {
+ if (TempPtr == nullptr)
+ TempPtr = BasePtr;
+ else if (TempPtr != BasePtr) {
+ LLVM_DEBUG(dbgs()
+ << "\nBasePointers are not same, stopping the pass");
+ continue;
+ }
+ DiffVal = const_cast<SCEV *>(SE.getMinusSCEV(ScevVal, BasePtr));
+ // Get the index of scev
+ if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(DiffVal)) {
+ if ((GetEle = const_cast<SCEV *>(AddRec->getStart()))) {
+ if (SCEVConstant *BConst = dyn_cast<SCEVConstant>(GetEle)) {
+ ConstantInt *CI = BConst->getValue();
+ Value = CI->getSExtValue();
+ }
+ OffSet.push_back(Value);
+ StoreInsts[Value] = &I;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Sorting the Offset vector values
+ std::sort(OffSet.begin(), OffSet.end());
+
+ // Get the BB of store instruction and using that get the terminator
+ // instruction
+ BasicBlock *StoreInstBB = StoreInsts[OffSet[0]]->getParent();
+ Instruction *LastInst = StoreInstBB->getTerminator();
+
+ // Move all store instructions one by one before terminator instruction
+ if (OffSet.size() != 0) {
+ for (auto V = OffSet.begin(), E = OffSet.end(); V != E; V = V + 1) {
+ StoreInsts[*V]->moveBefore(LastInst);
+ }
+ }
+ return false;
+ }
+
+ // We don't modify the program, so we preserve all analyses.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+};
+} // namespace
+
+char Scev::ID = 0;
+static RegisterPass<Scev>
+ X("scev", "Scev Implementation Pass (with getAnalysisUsage implemented)");
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Scev/Scev.exports b/llvm/lib/Transforms/Scev/Scev.exports
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 588ebd7a8fda17..0ab0d081f586b9 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,2 +1,3 @@
add_subdirectory(cfcss)
add_subdirectory(assignment1)
+add_subdirectory(scev)
diff --git a/tests/scev/command.sh b/tests/scev/command.sh
new file mode 100644
index 00000000000000..7eca24760d208a
--- /dev/null
+++ b/tests/scev/command.sh
@@ -0,0 +1,8 @@
+# first command is to emit ir for test case
+clang -S -emit-llvm scev.c -Xclang -disable-O0-optnone
+
+#second command is to clean up ir so that scev can understand it
+opt -mem2reg -loop-simplify -instcombine -instnamer -indvars scev.ll -S -o out.ll
+
+#Third command will run scev
+opt -load ${LLVM_HOME}/build/lib/LLVMScev.so -scev out.ll -enable-new-pm=0 -S -o out1.ll
\ No newline at end of file
diff --git a/tests/scev/out.ll b/tests/scev/out.ll
new file mode 100644
index 00000000000000..d172d379a14edf
--- /dev/null
+++ b/tests/scev/out.ll
@@ -0,0 +1,83 @@
+; ModuleID = 'scev.ll'
+source_filename = "scev.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %A = alloca [10 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp ult i64 %indvars.iv, 5
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 16
+ %2 = add nuw nsw i64 %indvars.iv, 11
+ %3 = or i64 %indvars.iv, 1
+ %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %3
+ %4 = trunc i64 %2 to i32
+ store i32 %4, i32* %arrayidx5, align 4
+ %5 = add nuw nsw i64 %indvars.iv, 12
+ %6 = or i64 %indvars.iv, 2
+ %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %6
+ %7 = trunc i64 %5 to i32
+ store i32 %7, i32* %arrayidx10, align 8
+ %8 = add nuw nsw i64 %indvars.iv, 13
+ %9 = or i64 %indvars.iv, 3
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %9
+ %10 = trunc i64 %8 to i32
+ store i32 %10, i32* %arrayidx15, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond17
+
+for.cond17: ; preds = %for.inc22, %for.end
+ %indvars.iv9 = phi i64 [ %indvars.iv.next10, %for.inc22 ], [ 0, %for.end ]
+ %exitcond = icmp ne i64 %indvars.iv9, 5
+ br i1 %exitcond, label %for.body19, label %for.end23
+
+for.body19: ; preds = %for.cond17
+ %arrayidx21 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv9
+ %i = load i32, i32* %arrayidx21, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32 noundef %i) #2
+ br label %for.inc22
+
+for.inc22: ; preds = %for.body19
+ %indvars.iv.next10 = add nuw nsw i64 %indvars.iv9, 1
+ br label %for.cond17, !llvm.loop !6
+
+for.end23: ; preds = %for.cond17
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/out1.ll b/tests/scev/out1.ll
new file mode 100644
index 00000000000000..7afff529e3a892
--- /dev/null
+++ b/tests/scev/out1.ll
@@ -0,0 +1,83 @@
+; ModuleID = 'out.ll'
+source_filename = "scev.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %A = alloca [10 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp ult i64 %indvars.iv, 5
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ %2 = add nuw nsw i64 %indvars.iv, 11
+ %3 = or i64 %indvars.iv, 1
+ %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %3
+ %4 = trunc i64 %2 to i32
+ %5 = add nuw nsw i64 %indvars.iv, 12
+ %6 = or i64 %indvars.iv, 2
+ %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %6
+ %7 = trunc i64 %5 to i32
+ %8 = add nuw nsw i64 %indvars.iv, 13
+ %9 = or i64 %indvars.iv, 3
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %9
+ %10 = trunc i64 %8 to i32
+ store i32 %1, i32* %arrayidx, align 16
+ store i32 %4, i32* %arrayidx5, align 4
+ store i32 %7, i32* %arrayidx10, align 8
+ store i32 %10, i32* %arrayidx15, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond17
+
+for.cond17: ; preds = %for.inc22, %for.end
+ %indvars.iv9 = phi i64 [ %indvars.iv.next10, %for.inc22 ], [ 0, %for.end ]
+ %exitcond = icmp ne i64 %indvars.iv9, 5
+ br i1 %exitcond, label %for.body19, label %for.end23
+
+for.body19: ; preds = %for.cond17
+ %arrayidx21 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv9
+ %i = load i32, i32* %arrayidx21, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32 noundef %i) #2
+ br label %for.inc22
+
+for.inc22: ; preds = %for.body19
+ %indvars.iv.next10 = add nuw nsw i64 %indvars.iv9, 1
+ br label %for.cond17, !llvm.loop !6
+
+for.end23: ; preds = %for.cond17
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev.c b/tests/scev/scev.c
new file mode 100644
index 00000000000000..2b9fb9aba8b2df
--- /dev/null
+++ b/tests/scev/scev.c
@@ -0,0 +1,31 @@
+#include<stdio.h>
+#include<stdlib.h>
+int Z=5;
+// void print(int *A,int *B)
+// {
+// printf("A=%d, B=%d", *A,*B);
+// }
+
+int main()
+{
+
+ int C = 10,A[10],N=5,B[12];
+ for(int I = 0; I < N; I += 4) {
+ // A[I+2] = C+I+2;
+ // A[I+1] = C+I+1;
+
+
+ // A[I+3] = C+I+3;
+ // A[I] = C+I;
+ A[I] = C+I;
+ A[I+1] = C+I+1;
+ A[I+2] = C+I+2;
+ A[I+3] = C+I+3;
+
+}
+for (int i=0; i<N;i++) {
+ printf("%d", A[i] );
+}
+// print(A,B);
+return 0;
+}
diff --git a/tests/scev/scev.ll b/tests/scev/scev.ll
new file mode 100644
index 00000000000000..2054e81e975ddb
--- /dev/null
+++ b/tests/scev/scev.ll
@@ -0,0 +1,116 @@
+; ModuleID = 'scev.c'
+source_filename = "scev.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %C = alloca i32, align 4
+ %A = alloca [10 x i32], align 16
+ %N = alloca i32, align 4
+ %B = alloca [12 x i32], align 16
+ %I = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ store i32 10, i32* %C, align 4
+ store i32 5, i32* %N, align 4
+ store i32 0, i32* %I, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %I, align 4
+ %1 = load i32, i32* %N, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %C, align 4
+ %3 = load i32, i32* %I, align 4
+ %add = add nsw i32 %2, %3
+ %4 = load i32, i32* %I, align 4
+ %idxprom = sext i32 %4 to i64
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %5 = load i32, i32* %C, align 4
+ %6 = load i32, i32* %I, align 4
+ %add1 = add nsw i32 %5, %6
+ %add2 = add nsw i32 %add1, 1
+ %7 = load i32, i32* %I, align 4
+ %add3 = add nsw i32 %7, 1
+ %idxprom4 = sext i32 %add3 to i64
+ %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom4
+ store i32 %add2, i32* %arrayidx5, align 4
+ %8 = load i32, i32* %C, align 4
+ %9 = load i32, i32* %I, align 4
+ %add6 = add nsw i32 %8, %9
+ %add7 = add nsw i32 %add6, 2
+ %10 = load i32, i32* %I, align 4
+ %add8 = add nsw i32 %10, 2
+ %idxprom9 = sext i32 %add8 to i64
+ %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom9
+ store i32 %add7, i32* %arrayidx10, align 4
+ %11 = load i32, i32* %C, align 4
+ %12 = load i32, i32* %I, align 4
+ %add11 = add nsw i32 %11, %12
+ %add12 = add nsw i32 %add11, 3
+ %13 = load i32, i32* %I, align 4
+ %add13 = add nsw i32 %13, 3
+ %idxprom14 = sext i32 %add13 to i64
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom14
+ store i32 %add12, i32* %arrayidx15, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %14 = load i32, i32* %I, align 4
+ %add16 = add nsw i32 %14, 4
+ store i32 %add16, i32* %I, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 0, i32* %i, align 4
+ br label %for.cond17
+
+for.cond17: ; preds = %for.inc22, %for.end
+ %15 = load i32, i32* %i, align 4
+ %16 = load i32, i32* %N, align 4
+ %cmp18 = icmp slt i32 %15, %16
+ br i1 %cmp18, label %for.body19, label %for.end23
+
+for.body19: ; preds = %for.cond17
+ %17 = load i32, i32* %i, align 4
+ %idxprom20 = sext i32 %17 to i64
+ %arrayidx21 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom20
+ %18 = load i32, i32* %arrayidx21, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32 noundef %18)
+ br label %for.inc22
+
+for.inc22: ; preds = %for.body19
+ %19 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %19, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond17, !llvm.loop !6
+
+for.end23: ; preds = %for.cond17
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
>From 7f049514ee22563de5f8817412efd6d7d83109cf Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Fri, 1 Jul 2022 09:53:46 +0300
Subject: [PATCH 4/8] Adding scev pass
---
llvm/lib/Transforms/Scev/Scev.cpp | 71 ++++++++++------
tests/scev/out.ll | 40 ++++-----
tests/scev/out1.ll | 36 ++++-----
tests/scev/scev.c | 14 ++--
tests/scev/scev.ll | 40 ++++-----
tests/scev/scev1.c | 19 +++++
tests/scev/scev1.ll | 120 +++++++++++++++++++++++++++
tests/scev/scev1_1.ll | 86 ++++++++++++++++++++
tests/scev/scev1_2.ll | 86 ++++++++++++++++++++
tests/scev/scev_negative.c | 19 +++++
tests/scev/scev_negative.ll | 130 ++++++++++++++++++++++++++++++
tests/scev/scev_negative_out.ll | 92 +++++++++++++++++++++
tests/scev/scev_negative_out1.ll | 92 +++++++++++++++++++++
13 files changed, 754 insertions(+), 91 deletions(-)
create mode 100644 tests/scev/scev1.c
create mode 100644 tests/scev/scev1.ll
create mode 100644 tests/scev/scev1_1.ll
create mode 100644 tests/scev/scev1_2.ll
create mode 100644 tests/scev/scev_negative.c
create mode 100644 tests/scev/scev_negative.ll
create mode 100644 tests/scev/scev_negative_out.ll
create mode 100644 tests/scev/scev_negative_out1.ll
diff --git a/llvm/lib/Transforms/Scev/Scev.cpp b/llvm/lib/Transforms/Scev/Scev.cpp
index 3c7aaaf05c24a8..7f004a6a971df6 100644
--- a/llvm/lib/Transforms/Scev/Scev.cpp
+++ b/llvm/lib/Transforms/Scev/Scev.cpp
@@ -1,8 +1,8 @@
/*===- Scev.cpp -Creates and Simplifies Recurrences for ‘Expressions involving
Induction Variables’ Algorithm:
1. Get ScalarEvolution object.
-2. Use getSCEV for the pointer operands
-3. Take the scev pointer base
+2. Use getSCEV for the pointer operands.
+3. Take the scev pointer base.
4. Subtract scev with scev pointer base to get the SCEVAddRecExpr(DiffVal).
eg:{8,+,16}<nuw><nsw><%for.cond>
5. This SCEVAddRecExpr will contain the required indices and Extract it. eg : 8
@@ -10,10 +10,11 @@ eg:{8,+,16}<nuw><nsw><%for.cond>
7. Sorting the Offset vector values.
8. Get the BB of store instruction and using that get the terminator
instruction.
-9. Move all store instructions one by one before terminator instruction.
+9. Move all store instructions and corresponding operands one by one before terminator instruction.
===-------------------------------------------------------------------------------------------===*/
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -44,35 +45,40 @@ struct Scev : public FunctionPass {
SCEV *ScevVal, *BasePtr, *DiffVal, *GetEle, *TempPtr = nullptr;
SmallVector<int> OffSet;
llvm::DenseMap<int, Instruction *> StoreInsts;
+ //llvm::DenseMap<SCEV *, SmallVector <Instruction *> > BasePtrMap;
+ Instruction *Inst;
int Value = 0;
// Store the index and corresponding Store instruction in StoreInsts map
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
- if (auto *Store = dyn_cast<StoreInst>(&I)) {
- if (auto *Gep =
- dyn_cast<GetElementPtrInst>(Store->getPointerOperand())) {
- ScevVal = const_cast<SCEV *>(SE.getSCEV(Gep));
- if ((BasePtr = const_cast<SCEV *>(SE.getPointerBase(ScevVal)))) {
- if (TempPtr == nullptr)
- TempPtr = BasePtr;
- else if (TempPtr != BasePtr) {
- LLVM_DEBUG(dbgs()
- << "\nBasePointers are not same, stopping the pass");
- continue;
- }
- DiffVal = const_cast<SCEV *>(SE.getMinusSCEV(ScevVal, BasePtr));
- // Get the index of scev
- if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(DiffVal)) {
- if ((GetEle = const_cast<SCEV *>(AddRec->getStart()))) {
- if (SCEVConstant *BConst = dyn_cast<SCEVConstant>(GetEle)) {
- ConstantInt *CI = BConst->getValue();
- Value = CI->getSExtValue();
- }
- OffSet.push_back(Value);
- StoreInsts[Value] = &I;
+ if (!isa<StoreInst>(I)) {
+ continue;
+ }
+ auto *Store = dyn_cast<StoreInst>(&I);
+ if (auto *Gep =
+ dyn_cast<GetElementPtrInst>(Store->getPointerOperand())) {
+ ScevVal = const_cast<SCEV *>(SE.getSCEV(Gep));
+ if ((BasePtr = const_cast<SCEV *>(SE.getPointerBase(ScevVal)))) {
+ // BasePtrMap[BasePtr].push_back(Store);
+ if (TempPtr == nullptr)
+ TempPtr = BasePtr;
+ else if (TempPtr != BasePtr) {
+ LLVM_DEBUG(dbgs()
+ << "\nBasePointers are not same, stopping the pass");
+ continue;
+ }
+ DiffVal = const_cast<SCEV *>(SE.getMinusSCEV(ScevVal, BasePtr));
+ // Get the index of scev
+ if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(DiffVal)) {
+ if ((GetEle = const_cast<SCEV *>(AddRec->getStart()))) {
+ if (SCEVConstant *BConst = dyn_cast<SCEVConstant>(GetEle)) {
+ ConstantInt *CI = BConst->getValue();
+ Value = CI->getSExtValue();
}
+ OffSet.push_back(Value);
+ StoreInsts[Value] = &I;
}
}
}
@@ -80,6 +86,14 @@ struct Scev : public FunctionPass {
}
}
+ // If vector is already Sorted , then there is use of continuing the code. Stop the pass.
+ if (std::is_sorted(OffSet.begin(), OffSet.end())) {
+ LLVM_DEBUG(
+ dbgs()
+ << "\nScave values are already in sorted order.Exiting the pass");
+ return true;
+ }
+
// Sorting the Offset vector values
std::sort(OffSet.begin(), OffSet.end());
@@ -88,10 +102,15 @@ struct Scev : public FunctionPass {
BasicBlock *StoreInstBB = StoreInsts[OffSet[0]]->getParent();
Instruction *LastInst = StoreInstBB->getTerminator();
- // Move all store instructions one by one before terminator instruction
+ // Move all store instructions and corresponding operands one by one before terminator instruction
if (OffSet.size() != 0) {
for (auto V = OffSet.begin(), E = OffSet.end(); V != E; V = V + 1) {
StoreInsts[*V]->moveBefore(LastInst);
+ for (Use &U : StoreInsts[*V]->operands()) {
+ llvm::Value *Val = U.get();
+ Inst = dyn_cast<Instruction>(Val);
+ Inst->moveBefore(StoreInsts[*V]);
+ }
}
}
return false;
diff --git a/tests/scev/out.ll b/tests/scev/out.ll
index d172d379a14edf..99bce0e775e908 100644
--- a/tests/scev/out.ll
+++ b/tests/scev/out.ll
@@ -18,25 +18,25 @@ for.cond: ; preds = %for.inc, %entry
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %0 = add nuw nsw i64 %indvars.iv, 10
- %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
- %1 = trunc i64 %0 to i32
- store i32 %1, i32* %arrayidx, align 16
- %2 = add nuw nsw i64 %indvars.iv, 11
- %3 = or i64 %indvars.iv, 1
- %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %3
- %4 = trunc i64 %2 to i32
- store i32 %4, i32* %arrayidx5, align 4
- %5 = add nuw nsw i64 %indvars.iv, 12
- %6 = or i64 %indvars.iv, 2
- %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %6
- %7 = trunc i64 %5 to i32
- store i32 %7, i32* %arrayidx10, align 8
- %8 = add nuw nsw i64 %indvars.iv, 13
- %9 = or i64 %indvars.iv, 3
- %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %9
- %10 = trunc i64 %8 to i32
- store i32 %10, i32* %arrayidx15, align 4
+ %0 = add nuw nsw i64 %indvars.iv, 12
+ %1 = or i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %1
+ %2 = trunc i64 %0 to i32
+ store i32 %2, i32* %arrayidx, align 8
+ %3 = add nuw nsw i64 %indvars.iv, 11
+ %4 = or i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %4
+ %5 = trunc i64 %3 to i32
+ store i32 %5, i32* %arrayidx7, align 4
+ %6 = add nuw nsw i64 %indvars.iv, 13
+ %7 = or i64 %indvars.iv, 3
+ %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %7
+ %8 = trunc i64 %6 to i32
+ store i32 %8, i32* %arrayidx12, align 4
+ %9 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
+ %10 = trunc i64 %9 to i32
+ store i32 %10, i32* %arrayidx15, align 16
br label %for.inc
for.inc: ; preds = %for.body
@@ -77,7 +77,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
diff --git a/tests/scev/out1.ll b/tests/scev/out1.ll
index 7afff529e3a892..c4e29e27ee4d43 100644
--- a/tests/scev/out1.ll
+++ b/tests/scev/out1.ll
@@ -18,25 +18,25 @@ for.cond: ; preds = %for.inc, %entry
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %0 = add nuw nsw i64 %indvars.iv, 10
- %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
- %1 = trunc i64 %0 to i32
+ %0 = add nuw nsw i64 %indvars.iv, 12
+ %1 = or i64 %indvars.iv, 2
%2 = add nuw nsw i64 %indvars.iv, 11
%3 = or i64 %indvars.iv, 1
- %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %3
- %4 = trunc i64 %2 to i32
- %5 = add nuw nsw i64 %indvars.iv, 12
- %6 = or i64 %indvars.iv, 2
- %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %6
- %7 = trunc i64 %5 to i32
- %8 = add nuw nsw i64 %indvars.iv, 13
- %9 = or i64 %indvars.iv, 3
- %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %9
- %10 = trunc i64 %8 to i32
- store i32 %1, i32* %arrayidx, align 16
- store i32 %4, i32* %arrayidx5, align 4
- store i32 %7, i32* %arrayidx10, align 8
- store i32 %10, i32* %arrayidx15, align 4
+ %4 = add nuw nsw i64 %indvars.iv, 13
+ %5 = or i64 %indvars.iv, 3
+ %6 = add nuw nsw i64 %indvars.iv, 10
+ %7 = trunc i64 %6 to i32
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
+ store i32 %7, i32* %arrayidx15, align 16
+ %8 = trunc i64 %2 to i32
+ %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %3
+ store i32 %8, i32* %arrayidx7, align 4
+ %9 = trunc i64 %0 to i32
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %1
+ store i32 %9, i32* %arrayidx, align 8
+ %10 = trunc i64 %4 to i32
+ %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %5
+ store i32 %10, i32* %arrayidx12, align 4
br label %for.inc
for.inc: ; preds = %for.body
@@ -77,7 +77,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev.c b/tests/scev/scev.c
index 2b9fb9aba8b2df..27247470576806 100644
--- a/tests/scev/scev.c
+++ b/tests/scev/scev.c
@@ -11,16 +11,16 @@ int main()
int C = 10,A[10],N=5,B[12];
for(int I = 0; I < N; I += 4) {
- // A[I+2] = C+I+2;
- // A[I+1] = C+I+1;
+ A[I+2] = C+I+2;
+ A[I+1] = C+I+1;
- // A[I+3] = C+I+3;
- // A[I] = C+I;
- A[I] = C+I;
- A[I+1] = C+I+1;
- A[I+2] = C+I+2;
A[I+3] = C+I+3;
+ A[I] = C+I;
+ // A[I] = C+I;
+ // A[I+1] = C+I+1;
+ // A[I+2] = C+I+2;
+ // A[I+3] = C+I+3;
}
for (int i=0; i<N;i++) {
diff --git a/tests/scev/scev.ll b/tests/scev/scev.ll
index 2054e81e975ddb..24b364067e5e5f 100644
--- a/tests/scev/scev.ll
+++ b/tests/scev/scev.ll
@@ -32,37 +32,37 @@ for.body: ; preds = %for.cond
%2 = load i32, i32* %C, align 4
%3 = load i32, i32* %I, align 4
%add = add nsw i32 %2, %3
+ %add1 = add nsw i32 %add, 2
%4 = load i32, i32* %I, align 4
- %idxprom = sext i32 %4 to i64
+ %add2 = add nsw i32 %4, 2
+ %idxprom = sext i32 %add2 to i64
%arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom
- store i32 %add, i32* %arrayidx, align 4
+ store i32 %add1, i32* %arrayidx, align 4
%5 = load i32, i32* %C, align 4
%6 = load i32, i32* %I, align 4
- %add1 = add nsw i32 %5, %6
- %add2 = add nsw i32 %add1, 1
+ %add3 = add nsw i32 %5, %6
+ %add4 = add nsw i32 %add3, 1
%7 = load i32, i32* %I, align 4
- %add3 = add nsw i32 %7, 1
- %idxprom4 = sext i32 %add3 to i64
- %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom4
- store i32 %add2, i32* %arrayidx5, align 4
+ %add5 = add nsw i32 %7, 1
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom6
+ store i32 %add4, i32* %arrayidx7, align 4
%8 = load i32, i32* %C, align 4
%9 = load i32, i32* %I, align 4
- %add6 = add nsw i32 %8, %9
- %add7 = add nsw i32 %add6, 2
+ %add8 = add nsw i32 %8, %9
+ %add9 = add nsw i32 %add8, 3
%10 = load i32, i32* %I, align 4
- %add8 = add nsw i32 %10, 2
- %idxprom9 = sext i32 %add8 to i64
- %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom9
- store i32 %add7, i32* %arrayidx10, align 4
+ %add10 = add nsw i32 %10, 3
+ %idxprom11 = sext i32 %add10 to i64
+ %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom11
+ store i32 %add9, i32* %arrayidx12, align 4
%11 = load i32, i32* %C, align 4
%12 = load i32, i32* %I, align 4
- %add11 = add nsw i32 %11, %12
- %add12 = add nsw i32 %add11, 3
+ %add13 = add nsw i32 %11, %12
%13 = load i32, i32* %I, align 4
- %add13 = add nsw i32 %13, 3
- %idxprom14 = sext i32 %add13 to i64
+ %idxprom14 = sext i32 %13 to i64
%arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom14
- store i32 %add12, i32* %arrayidx15, align 4
+ store i32 %add13, i32* %arrayidx15, align 4
br label %for.inc
for.inc: ; preds = %for.body
@@ -110,7 +110,7 @@ attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protect
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev1.c b/tests/scev/scev1.c
new file mode 100644
index 00000000000000..68210a4cc2d8fd
--- /dev/null
+++ b/tests/scev/scev1.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+int Z = 5;
+
+int main() {
+ int A[100], B[100];
+ int N = 20;
+ int C = 10;
+ for (int I = 0; I < N; I += 4) {
+ A[I + 2] = C + I + 2;
+ B[I + 1] = C + I + 1;
+ A[I + 3] = C + I + 3;
+ A[I] = C + I;
+ }
+ for (int I = 0; I < N; I++) {
+ printf("%d%d", A[I], B[I]);
+ }
+ return 0;
+}
diff --git a/tests/scev/scev1.ll b/tests/scev/scev1.ll
new file mode 100644
index 00000000000000..1bcf5ba88bb206
--- /dev/null
+++ b/tests/scev/scev1.ll
@@ -0,0 +1,120 @@
+; ModuleID = 'scev1.c'
+source_filename = "scev1.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [5 x i8] c"%d%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %A = alloca [100 x i32], align 16
+ %B = alloca [100 x i32], align 16
+ %N = alloca i32, align 4
+ %C = alloca i32, align 4
+ %I = alloca i32, align 4
+ %I17 = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ store i32 20, i32* %N, align 4
+ store i32 10, i32* %C, align 4
+ store i32 0, i32* %I, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %I, align 4
+ %1 = load i32, i32* %N, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %C, align 4
+ %3 = load i32, i32* %I, align 4
+ %add = add nsw i32 %2, %3
+ %add1 = add nsw i32 %add, 2
+ %4 = load i32, i32* %I, align 4
+ %add2 = add nsw i32 %4, 2
+ %idxprom = sext i32 %add2 to i64
+ %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %idxprom
+ store i32 %add1, i32* %arrayidx, align 4
+ %5 = load i32, i32* %C, align 4
+ %6 = load i32, i32* %I, align 4
+ %add3 = add nsw i32 %5, %6
+ %add4 = add nsw i32 %add3, 1
+ %7 = load i32, i32* %I, align 4
+ %add5 = add nsw i32 %7, 1
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %B, i64 0, i64 %idxprom6
+ store i32 %add4, i32* %arrayidx7, align 4
+ %8 = load i32, i32* %C, align 4
+ %9 = load i32, i32* %I, align 4
+ %add8 = add nsw i32 %8, %9
+ %add9 = add nsw i32 %add8, 3
+ %10 = load i32, i32* %I, align 4
+ %add10 = add nsw i32 %10, 3
+ %idxprom11 = sext i32 %add10 to i64
+ %arrayidx12 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %idxprom11
+ store i32 %add9, i32* %arrayidx12, align 4
+ %11 = load i32, i32* %C, align 4
+ %12 = load i32, i32* %I, align 4
+ %add13 = add nsw i32 %11, %12
+ %13 = load i32, i32* %I, align 4
+ %idxprom14 = sext i32 %13 to i64
+ %arrayidx15 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %idxprom14
+ store i32 %add13, i32* %arrayidx15, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %14 = load i32, i32* %I, align 4
+ %add16 = add nsw i32 %14, 4
+ store i32 %add16, i32* %I, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 0, i32* %I17, align 4
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc25, %for.end
+ %15 = load i32, i32* %I17, align 4
+ %16 = load i32, i32* %N, align 4
+ %cmp19 = icmp slt i32 %15, %16
+ br i1 %cmp19, label %for.body20, label %for.end26
+
+for.body20: ; preds = %for.cond18
+ %17 = load i32, i32* %I17, align 4
+ %idxprom21 = sext i32 %17 to i64
+ %arrayidx22 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %idxprom21
+ %18 = load i32, i32* %arrayidx22, align 4
+ %19 = load i32, i32* %I17, align 4
+ %idxprom23 = sext i32 %19 to i64
+ %arrayidx24 = getelementptr inbounds [100 x i32], [100 x i32]* %B, i64 0, i64 %idxprom23
+ %20 = load i32, i32* %arrayidx24, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i32 noundef %18, i32 noundef %20)
+ br label %for.inc25
+
+for.inc25: ; preds = %for.body20
+ %21 = load i32, i32* %I17, align 4
+ %inc = add nsw i32 %21, 1
+ store i32 %inc, i32* %I17, align 4
+ br label %for.cond18, !llvm.loop !6
+
+for.end26: ; preds = %for.cond18
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev1_1.ll b/tests/scev/scev1_1.ll
new file mode 100644
index 00000000000000..1f3db930596144
--- /dev/null
+++ b/tests/scev/scev1_1.ll
@@ -0,0 +1,86 @@
+; ModuleID = 'scev1.ll'
+source_filename = "scev1.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [5 x i8] c"%d%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %A = alloca [100 x i32], align 16
+ %B = alloca [100 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp ult i64 %indvars.iv, 20
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = add nuw nsw i64 %indvars.iv, 12
+ %1 = or i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %1
+ %2 = trunc i64 %0 to i32
+ store i32 %2, i32* %arrayidx, align 8
+ %3 = add nuw nsw i64 %indvars.iv, 11
+ %4 = or i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %B, i64 0, i64 %4
+ %5 = trunc i64 %3 to i32
+ store i32 %5, i32* %arrayidx7, align 4
+ %6 = add nuw nsw i64 %indvars.iv, 13
+ %7 = or i64 %indvars.iv, 3
+ %arrayidx12 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %7
+ %8 = trunc i64 %6 to i32
+ store i32 %8, i32* %arrayidx12, align 4
+ %9 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx15 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %indvars.iv
+ %10 = trunc i64 %9 to i32
+ store i32 %10, i32* %arrayidx15, align 16
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc25, %for.end
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %for.inc25 ], [ 0, %for.end ]
+ %exitcond = icmp ne i64 %indvars.iv11, 20
+ br i1 %exitcond, label %for.body20, label %for.end26
+
+for.body20: ; preds = %for.cond18
+ %arrayidx22 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %indvars.iv11
+ %i = load i32, i32* %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds [100 x i32], [100 x i32]* %B, i64 0, i64 %indvars.iv11
+ %i1 = load i32, i32* %arrayidx24, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i32 noundef %i, i32 noundef %i1) #2
+ br label %for.inc25
+
+for.inc25: ; preds = %for.body20
+ %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+ br label %for.cond18, !llvm.loop !6
+
+for.end26: ; preds = %for.cond18
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev1_2.ll b/tests/scev/scev1_2.ll
new file mode 100644
index 00000000000000..b60e791c1ca765
--- /dev/null
+++ b/tests/scev/scev1_2.ll
@@ -0,0 +1,86 @@
+; ModuleID = 'scev1_1.ll'
+source_filename = "scev1.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [5 x i8] c"%d%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %A = alloca [100 x i32], align 16
+ %B = alloca [100 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp ult i64 %indvars.iv, 20
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = add nuw nsw i64 %indvars.iv, 12
+ %1 = or i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %1
+ %2 = trunc i64 %0 to i32
+ %3 = add nuw nsw i64 %indvars.iv, 11
+ %4 = or i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %B, i64 0, i64 %4
+ %5 = trunc i64 %3 to i32
+ store i32 %5, i32* %arrayidx7, align 4
+ %6 = add nuw nsw i64 %indvars.iv, 13
+ %7 = or i64 %indvars.iv, 3
+ %arrayidx12 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %7
+ %8 = trunc i64 %6 to i32
+ %9 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx15 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %indvars.iv
+ %10 = trunc i64 %9 to i32
+ store i32 %10, i32* %arrayidx15, align 16
+ store i32 %2, i32* %arrayidx, align 8
+ store i32 %8, i32* %arrayidx12, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc25, %for.end
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %for.inc25 ], [ 0, %for.end ]
+ %exitcond = icmp ne i64 %indvars.iv11, 20
+ br i1 %exitcond, label %for.body20, label %for.end26
+
+for.body20: ; preds = %for.cond18
+ %arrayidx22 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 0, i64 %indvars.iv11
+ %i = load i32, i32* %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds [100 x i32], [100 x i32]* %B, i64 0, i64 %indvars.iv11
+ %i1 = load i32, i32* %arrayidx24, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i32 noundef %i, i32 noundef %i1) #2
+ br label %for.inc25
+
+for.inc25: ; preds = %for.body20
+ %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+ br label %for.cond18, !llvm.loop !6
+
+for.end26: ; preds = %for.cond18
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 973cbf86551cd15a9cb85bbb0ad01f2406ccb62d)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev_negative.c b/tests/scev/scev_negative.c
new file mode 100644
index 00000000000000..b91f1f3f6b00ef
--- /dev/null
+++ b/tests/scev/scev_negative.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+int Z = 5;
+
+int main() {
+
+ int C = 10, A[10], N = 5, B[12], E[12], D[12];
+ for (int I = 0; I < N; I += 4) {
+ D[I + 2] = C + I + 2;
+ B[I + 1] = C + I + 1;
+ E[I + 3] = C + I + 3;
+ A[I] = C + I;
+ }
+ for (int i = 0; i < N; i++) {
+ printf("%d%d%d%d", A[i],B[i],E[i],D[i]);
+ }
+ // print(A,B);
+ return 0;
+}
diff --git a/tests/scev/scev_negative.ll b/tests/scev/scev_negative.ll
new file mode 100644
index 00000000000000..6af7bac6d26cc0
--- /dev/null
+++ b/tests/scev/scev_negative.ll
@@ -0,0 +1,130 @@
+; ModuleID = 'scev_negative.c'
+source_filename = "scev_negative.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [9 x i8] c"%d%d%d%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %C = alloca i32, align 4
+ %A = alloca [10 x i32], align 16
+ %N = alloca i32, align 4
+ %B = alloca [12 x i32], align 16
+ %E = alloca [12 x i32], align 16
+ %D = alloca [12 x i32], align 16
+ %I = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ store i32 10, i32* %C, align 4
+ store i32 5, i32* %N, align 4
+ store i32 0, i32* %I, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %I, align 4
+ %1 = load i32, i32* %N, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %C, align 4
+ %3 = load i32, i32* %I, align 4
+ %add = add nsw i32 %2, %3
+ %add1 = add nsw i32 %add, 2
+ %4 = load i32, i32* %I, align 4
+ %add2 = add nsw i32 %4, 2
+ %idxprom = sext i32 %add2 to i64
+ %arrayidx = getelementptr inbounds [12 x i32], [12 x i32]* %D, i64 0, i64 %idxprom
+ store i32 %add1, i32* %arrayidx, align 4
+ %5 = load i32, i32* %C, align 4
+ %6 = load i32, i32* %I, align 4
+ %add3 = add nsw i32 %5, %6
+ %add4 = add nsw i32 %add3, 1
+ %7 = load i32, i32* %I, align 4
+ %add5 = add nsw i32 %7, 1
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds [12 x i32], [12 x i32]* %B, i64 0, i64 %idxprom6
+ store i32 %add4, i32* %arrayidx7, align 4
+ %8 = load i32, i32* %C, align 4
+ %9 = load i32, i32* %I, align 4
+ %add8 = add nsw i32 %8, %9
+ %add9 = add nsw i32 %add8, 3
+ %10 = load i32, i32* %I, align 4
+ %add10 = add nsw i32 %10, 3
+ %idxprom11 = sext i32 %add10 to i64
+ %arrayidx12 = getelementptr inbounds [12 x i32], [12 x i32]* %E, i64 0, i64 %idxprom11
+ store i32 %add9, i32* %arrayidx12, align 4
+ %11 = load i32, i32* %C, align 4
+ %12 = load i32, i32* %I, align 4
+ %add13 = add nsw i32 %11, %12
+ %13 = load i32, i32* %I, align 4
+ %idxprom14 = sext i32 %13 to i64
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom14
+ store i32 %add13, i32* %arrayidx15, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %14 = load i32, i32* %I, align 4
+ %add16 = add nsw i32 %14, 4
+ store i32 %add16, i32* %I, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 0, i32* %i, align 4
+ br label %for.cond17
+
+for.cond17: ; preds = %for.inc28, %for.end
+ %15 = load i32, i32* %i, align 4
+ %16 = load i32, i32* %N, align 4
+ %cmp18 = icmp slt i32 %15, %16
+ br i1 %cmp18, label %for.body19, label %for.end29
+
+for.body19: ; preds = %for.cond17
+ %17 = load i32, i32* %i, align 4
+ %idxprom20 = sext i32 %17 to i64
+ %arrayidx21 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %idxprom20
+ %18 = load i32, i32* %arrayidx21, align 4
+ %19 = load i32, i32* %i, align 4
+ %idxprom22 = sext i32 %19 to i64
+ %arrayidx23 = getelementptr inbounds [12 x i32], [12 x i32]* %B, i64 0, i64 %idxprom22
+ %20 = load i32, i32* %arrayidx23, align 4
+ %21 = load i32, i32* %i, align 4
+ %idxprom24 = sext i32 %21 to i64
+ %arrayidx25 = getelementptr inbounds [12 x i32], [12 x i32]* %E, i64 0, i64 %idxprom24
+ %22 = load i32, i32* %arrayidx25, align 4
+ %23 = load i32, i32* %i, align 4
+ %idxprom26 = sext i32 %23 to i64
+ %arrayidx27 = getelementptr inbounds [12 x i32], [12 x i32]* %D, i64 0, i64 %idxprom26
+ %24 = load i32, i32* %arrayidx27, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i32 noundef %18, i32 noundef %20, i32 noundef %22, i32 noundef %24)
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body19
+ %25 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %25, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond17, !llvm.loop !6
+
+for.end29: ; preds = %for.cond17
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev_negative_out.ll b/tests/scev/scev_negative_out.ll
new file mode 100644
index 00000000000000..83b1de26729d5a
--- /dev/null
+++ b/tests/scev/scev_negative_out.ll
@@ -0,0 +1,92 @@
+; ModuleID = 'scev_negative.ll'
+source_filename = "scev_negative.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [9 x i8] c"%d%d%d%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %A = alloca [10 x i32], align 16
+ %B = alloca [12 x i32], align 16
+ %E = alloca [12 x i32], align 16
+ %D = alloca [12 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp ult i64 %indvars.iv, 5
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = add nuw nsw i64 %indvars.iv, 12
+ %1 = or i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [12 x i32], [12 x i32]* %D, i64 0, i64 %1
+ %2 = trunc i64 %0 to i32
+ store i32 %2, i32* %arrayidx, align 8
+ %3 = add nuw nsw i64 %indvars.iv, 11
+ %4 = or i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [12 x i32], [12 x i32]* %B, i64 0, i64 %4
+ %5 = trunc i64 %3 to i32
+ store i32 %5, i32* %arrayidx7, align 4
+ %6 = add nuw nsw i64 %indvars.iv, 13
+ %7 = or i64 %indvars.iv, 3
+ %arrayidx12 = getelementptr inbounds [12 x i32], [12 x i32]* %E, i64 0, i64 %7
+ %8 = trunc i64 %6 to i32
+ store i32 %8, i32* %arrayidx12, align 4
+ %9 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
+ %10 = trunc i64 %9 to i32
+ store i32 %10, i32* %arrayidx15, align 16
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond17
+
+for.cond17: ; preds = %for.inc28, %for.end
+ %indvars.iv12 = phi i64 [ %indvars.iv.next13, %for.inc28 ], [ 0, %for.end ]
+ %exitcond = icmp ne i64 %indvars.iv12, 5
+ br i1 %exitcond, label %for.body19, label %for.end29
+
+for.body19: ; preds = %for.cond17
+ %arrayidx21 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv12
+ %i = load i32, i32* %arrayidx21, align 4
+ %arrayidx23 = getelementptr inbounds [12 x i32], [12 x i32]* %B, i64 0, i64 %indvars.iv12
+ %i1 = load i32, i32* %arrayidx23, align 4
+ %arrayidx25 = getelementptr inbounds [12 x i32], [12 x i32]* %E, i64 0, i64 %indvars.iv12
+ %i2 = load i32, i32* %arrayidx25, align 4
+ %arrayidx27 = getelementptr inbounds [12 x i32], [12 x i32]* %D, i64 0, i64 %indvars.iv12
+ %i3 = load i32, i32* %arrayidx27, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i32 noundef %i, i32 noundef %i1, i32 noundef %i2, i32 noundef %i3) #2
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body19
+ %indvars.iv.next13 = add nuw nsw i64 %indvars.iv12, 1
+ br label %for.cond17, !llvm.loop !6
+
+for.end29: ; preds = %for.cond17
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/scev/scev_negative_out1.ll b/tests/scev/scev_negative_out1.ll
new file mode 100644
index 00000000000000..21214d9a6b89a4
--- /dev/null
+++ b/tests/scev/scev_negative_out1.ll
@@ -0,0 +1,92 @@
+; ModuleID = 'scev_negative_out.ll'
+source_filename = "scev_negative.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Z = dso_local global i32 5, align 4
+ at .str = private unnamed_addr constant [9 x i8] c"%d%d%d%d\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+ %A = alloca [10 x i32], align 16
+ %B = alloca [12 x i32], align 16
+ %E = alloca [12 x i32], align 16
+ %D = alloca [12 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp ult i64 %indvars.iv, 5
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = add nuw nsw i64 %indvars.iv, 12
+ %1 = or i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds [12 x i32], [12 x i32]* %D, i64 0, i64 %1
+ %2 = trunc i64 %0 to i32
+ store i32 %2, i32* %arrayidx, align 8
+ %3 = add nuw nsw i64 %indvars.iv, 11
+ %4 = or i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [12 x i32], [12 x i32]* %B, i64 0, i64 %4
+ %5 = trunc i64 %3 to i32
+ store i32 %5, i32* %arrayidx7, align 4
+ %6 = add nuw nsw i64 %indvars.iv, 13
+ %7 = or i64 %indvars.iv, 3
+ %arrayidx12 = getelementptr inbounds [12 x i32], [12 x i32]* %E, i64 0, i64 %7
+ %8 = trunc i64 %6 to i32
+ store i32 %8, i32* %arrayidx12, align 4
+ %9 = add nuw nsw i64 %indvars.iv, 10
+ %arrayidx15 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv
+ %10 = trunc i64 %9 to i32
+ store i32 %10, i32* %arrayidx15, align 16
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond17
+
+for.cond17: ; preds = %for.inc28, %for.end
+ %indvars.iv12 = phi i64 [ %indvars.iv.next13, %for.inc28 ], [ 0, %for.end ]
+ %exitcond = icmp ne i64 %indvars.iv12, 5
+ br i1 %exitcond, label %for.body19, label %for.end29
+
+for.body19: ; preds = %for.cond17
+ %arrayidx21 = getelementptr inbounds [10 x i32], [10 x i32]* %A, i64 0, i64 %indvars.iv12
+ %i = load i32, i32* %arrayidx21, align 4
+ %arrayidx23 = getelementptr inbounds [12 x i32], [12 x i32]* %B, i64 0, i64 %indvars.iv12
+ %i1 = load i32, i32* %arrayidx23, align 4
+ %arrayidx25 = getelementptr inbounds [12 x i32], [12 x i32]* %E, i64 0, i64 %indvars.iv12
+ %i2 = load i32, i32* %arrayidx25, align 4
+ %arrayidx27 = getelementptr inbounds [12 x i32], [12 x i32]* %D, i64 0, i64 %indvars.iv12
+ %i3 = load i32, i32* %arrayidx27, align 4
+ %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), i32 noundef %i, i32 noundef %i1, i32 noundef %i2, i32 noundef %i3) #2
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body19
+ %indvars.iv.next13 = add nuw nsw i64 %indvars.iv12, 1
+ br label %for.cond17, !llvm.loop !6
+
+for.end29: ; preds = %for.cond17
+ ret i32 0
+}
+
+declare dso_local i32 @printf(i8* noundef, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 8e78085d22f2ac489f95a76f7e2dcfb7d832e9b8)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
>From 47ee914ea16086c1958b93540ed2351bcdae7cdb Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Thu, 7 Jul 2022 10:09:06 +0300
Subject: [PATCH 5/8] Adding Loop Fusion pass
---
llvm/lib/Transforms/CMakeLists.txt | 3 +-
llvm/lib/Transforms/LoopFusion/CMakeLists.txt | 20 ++
llvm/lib/Transforms/LoopFusion/LoopFusion.cpp | 206 ++++++++++++++++++
.../Transforms/LoopFusion/LoopFusion.exports | 0
tests/CMakeLists.txt | 1 +
tests/loop_fuse/.init.dot | 16 ++
tests/loop_fuse/command.sh | 11 +
tests/loop_fuse/loop_fuse.c | 10 +
tests/loop_fuse/loop_fuse.ll | 103 +++++++++
tests/loop_fuse/loop_fuse_out.ll | 75 +++++++
tests/loop_fuse/loop_fuse_out1.ll | 61 ++++++
tests/loop_fuse/negative_loop_fuse.c | 10 +
tests/loop_fuse/negative_loop_fuse.ll | 103 +++++++++
tests/loop_fuse/negative_loop_fuse_out.ll | 76 +++++++
tests/loop_fuse/negative_loop_fuse_out1.ll | 76 +++++++
15 files changed, 770 insertions(+), 1 deletion(-)
create mode 100644 llvm/lib/Transforms/LoopFusion/CMakeLists.txt
create mode 100644 llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
create mode 100644 llvm/lib/Transforms/LoopFusion/LoopFusion.exports
create mode 100644 tests/loop_fuse/.init.dot
create mode 100644 tests/loop_fuse/command.sh
create mode 100644 tests/loop_fuse/loop_fuse.c
create mode 100644 tests/loop_fuse/loop_fuse.ll
create mode 100644 tests/loop_fuse/loop_fuse_out.ll
create mode 100644 tests/loop_fuse/loop_fuse_out1.ll
create mode 100644 tests/loop_fuse/negative_loop_fuse.c
create mode 100644 tests/loop_fuse/negative_loop_fuse.ll
create mode 100644 tests/loop_fuse/negative_loop_fuse_out.ll
create mode 100644 tests/loop_fuse/negative_loop_fuse_out1.ll
diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt
index 8ace411e1ca82b..6b165fd71dfcfb 100644
--- a/llvm/lib/Transforms/CMakeLists.txt
+++ b/llvm/lib/Transforms/CMakeLists.txt
@@ -10,4 +10,5 @@ add_subdirectory(ObjCARC)
add_subdirectory(Coroutines)
add_subdirectory(CFGuard)
add_subdirectory(Cfcss)
-add_subdirectory(Scev)
\ No newline at end of file
+add_subdirectory(Scev)
+add_subdirectory(LoopFusion)
diff --git a/llvm/lib/Transforms/LoopFusion/CMakeLists.txt b/llvm/lib/Transforms/LoopFusion/CMakeLists.txt
new file mode 100644
index 00000000000000..6c0edac49c6ec2
--- /dev/null
+++ b/llvm/lib/Transforms/LoopFusion/CMakeLists.txt
@@ -0,0 +1,20 @@
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+if( NOT LLVM_REQUIRES_RTTI )
+ if( NOT LLVM_REQUIRES_EH )
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/LoopFusion.exports)
+ endif()
+endif()
+
+if(WIN32 OR CYGWIN)
+ set(LLVM_LINK_COMPONENTS Core Support)
+endif()
+
+add_llvm_library( LLVMLoopFusion MODULE BUILDTREE_ONLY
+LoopFusion.cpp
+
+ DEPENDS
+ intrinsics_gen
+ PLUGIN_TOOL
+ opt
+ )
diff --git a/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp b/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
new file mode 100644
index 00000000000000..f82d6e597d858b
--- /dev/null
+++ b/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
@@ -0,0 +1,206 @@
+/*===- LoopFusion.cpp -
+ This program is the implementation of a pass for loop fusion in LLVM compiler.
+Two loops, which are adjacent and have the same condition and increments with
+respect to the loop variable may be fused, i.e, their bodies may be executed one
+after the other with in a single loop. The decision to fuse the loops is taken
+based on the legality and profitability of the fusion. It should not be
+performed if the resulting code has anti-dependency or if the execution time of
+the program increases. Algorithm:
+1. Check 2 loops are can fuse.
+2. Replace the use of induction variable of 2nd loop with that of 1st loop.
+3. Combine the bodies of loop1 and loop2.
+3. Set the succesor of 1st loop’s header to exit block of 2nd loop.
+4. Delete the unwanted basic blocks of 2nd loop.
+===-------------------------------------------------------------------------------------------===*/
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "hello"
+
+namespace {
+// Scev - The second implementation with getAnalysisUsage implemented.
+struct LoopFusion : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopFusion() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override {
+
+ SmallVector<Loop *> LoopVector;
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ for (auto *L : *LI) {
+ LoopVector.push_back(L);
+ }
+
+ // Function to perform basic checks on the two loops
+ if (fuseCheck(LoopVector[1], LoopVector[0]))
+ // Function to perform fusing on the two loops
+ fuseBody(LoopVector[1], LoopVector[0], F);
+
+ return false;
+ }
+
+ void fuseBody(Loop *Loop1, Loop *Loop2, Function &F) {
+ BasicBlock *Body1 = nullptr;
+ BasicBlock *Body2 = nullptr;
+ BasicBlock *Header1 = nullptr;
+ BasicBlock *Latch1 = nullptr;
+ BasicBlock *Exit2 = nullptr;
+
+ Body1 = getBody(Loop1);
+ Body2 = getBody(Loop2);
+ Header1 = getHeader(Loop1);
+ Latch1 = getLoopLatch(Loop1);
+ Exit2 = getLoopExit(Loop2);
+
+ PHINode *Phi1 = Loop1->getCanonicalInductionVariable();
+ PHINode *Phi2 = Loop2->getCanonicalInductionVariable();
+
+ // Replace the use of induction variable of 2nd loop with that of 1st loop.
+ Phi2->replaceAllUsesWith(Phi1);
+
+ for (BasicBlock &BB : F) {
+ BranchInst *BI = dyn_cast<BranchInst>(BB.getTerminator());
+ if (&BB == Body1) {
+ BI->setSuccessor(0, Body2);
+ }
+
+ if (&BB == Body2) {
+ BI->setSuccessor(0, Latch1);
+ }
+
+ if (&BB == Header1) {
+ BI->setSuccessor(1, Exit2);
+ }
+ }
+ // Function to remove un-wanted basic blocks.
+ EliminateUnreachableBlocks(F);
+ }
+
+ // Function to get Loop Body Blocks.
+ BasicBlock *getBody(Loop *L) {
+ for (BasicBlock *BB : L->getBlocks()) {
+ BasicBlock *HeaderBlock = L->getHeader();
+ if ((HeaderBlock != BB) && !(L->isLoopLatch(BB))) {
+ return BB;
+ }
+ }
+ return {};
+ }
+
+ // Function to get Loop Header Blocks.
+ BasicBlock *getHeader(Loop *L) { return L->getHeader(); }
+
+ // Function to get Loop Latch Blocks.
+ BasicBlock *getLoopLatch(Loop *L) {
+ for (BasicBlock *BB : L->getBlocks()) {
+ if (L->isLoopLatch(BB)) {
+ return BB;
+ }
+ }
+ return {};
+ }
+
+ // Function to get Loop exit blocks.
+ BasicBlock *getLoopExit(Loop *L) { return L->getExitBlock(); }
+
+ bool adjacent(Loop *Loop1, Loop *Loop2) {
+
+ BasicBlock *Bb1 = Loop1->getExitBlock();
+ BasicBlock *Bb2 = Loop2->getLoopPreheader();
+
+ // If exit block and preHeader are not same.
+ if (Bb1 != Bb2) {
+ if (Bb1->size() != 1)
+ return false;
+ if (Bb1->getTerminator()->getSuccessor(0) != Bb2)
+ return false;
+ if (Bb1 == nullptr || Bb2 == nullptr) {
+ llvm::errs() << "NULL Pointer encountered\n";
+ return false;
+ }
+ return false;
+ }
+ return true;
+ }
+
+ // Helper function to check and fuse two loops.
+ bool fuseCheck(Loop *L1, Loop *L2) {
+
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ // Check if the two loops are adjacent.
+ if (!adjacent(L1, L2)) {
+ llvm::errs() << "The two loops are not adjacent.CANNOT fuse\n";
+ return false;
+ }
+
+ // Check if the start integer is same.
+ if (startValue(*L1, *SE) != startValue(*L2, *SE)) {
+ llvm::errs() << "The loop check starting value is not same.CANNOT fuse\n";
+ return false;
+ }
+
+ // Check if the limit integer is same.
+ if (limitValue(L1) != limitValue(L2)) {
+ llvm::errs() << "The loop check limiting value is not same.CANNOT fuse\n";
+ return false;
+ }
+ return true;
+ }
+
+ // Check if the start value is same.
+ int startValue(Loop &LoopV, ScalarEvolution &SE) {
+ for (auto &IndVar : LoopV.getHeader()->phis()) {
+ Value *V = IndVar.getOperand(1);
+ auto startValue = dyn_cast<ConstantInt>(V);
+ return startValue->getSExtValue();
+ }
+ return {};
+ }
+
+ // Check if the limit value is same.
+ Value *limitValue(Loop *LoopV) {
+ Value *end;
+ for (Use &U : LoopV->getHeader()->getFirstNonPHI()->operands()) {
+ if (!dyn_cast<PHINode>(U.get())) {
+ Instruction *I = dyn_cast<Instruction>(U.get());
+ for (Use &U : I->operands())
+ end = U.get();
+ }
+ }
+ return end;
+ }
+
+ // We don't modify the program, so we preserve all analyses.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+};
+} // namespace
+
+char LoopFusion::ID = 0;
+static RegisterPass<LoopFusion>
+ X("loopfusion",
+ "LoopFusion Implementation Pass (with getAnalysisUsage implemented)");
\ No newline at end of file
diff --git a/llvm/lib/Transforms/LoopFusion/LoopFusion.exports b/llvm/lib/Transforms/LoopFusion/LoopFusion.exports
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 0ab0d081f586b9..2321ce7a8e9ce7 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,3 +1,4 @@
add_subdirectory(cfcss)
add_subdirectory(assignment1)
add_subdirectory(scev)
+add_subdirectory(loop_fuse)
diff --git a/tests/loop_fuse/.init.dot b/tests/loop_fuse/.init.dot
new file mode 100644
index 00000000000000..cecd6d2978d441
--- /dev/null
+++ b/tests/loop_fuse/.init.dot
@@ -0,0 +1,16 @@
+digraph "CFG for 'init' function" {
+ label="CFG for 'init' function";
+
+ Node0x558a60718b60 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{entry:\l %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)\l %wide.trip.count = zext i32 %smax to i64\l br label %for.cond\l}"];
+ Node0x558a60718b60 -> Node0x558a607194d0;
+ Node0x558a607194d0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond: \l %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]\l %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count\l br i1 %exitcond, label %for.body, label %for.end16\l|{<s0>T|<s1>F}}"];
+ Node0x558a607194d0:s0 -> Node0x558a60719900;
+ Node0x558a607194d0:s1 -> Node0x558a607199e0;
+ Node0x558a60719900 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body: \l %0 = shl nuw nsw i64 %indvars.iv, 1\l %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv\l %1 = trunc i64 %0 to i32\l store i32 %1, i32* %arrayidx, align 4\l %2 = trunc i64 %indvars.iv to i32\l %mul = mul nsw i32 %2, %2\l %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv\l store i32 %mul, i32* %arrayidx2, align 4\l br label %for.body6\l}"];
+ Node0x558a60719900 -> Node0x558a6071a270;
+ Node0x558a60719680 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l br label %for.cond, !llvm.loop !4\l}"];
+ Node0x558a60719680 -> Node0x558a607194d0;
+ Node0x558a6071a270 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body6: \l %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv\l %i = load i32, i32* %arrayidx8, align 4\l %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv\l %i1 = load i32, i32* %arrayidx10, align 4\l %add11 = add nsw i32 %i, %i1\l %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv\l store i32 %add11, i32* %arrayidx13, align 4\l br label %for.inc\l}"];
+ Node0x558a6071a270 -> Node0x558a60719680;
+ Node0x558a607199e0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end16: \l ret void\l}"];
+}
diff --git a/tests/loop_fuse/command.sh b/tests/loop_fuse/command.sh
new file mode 100644
index 00000000000000..aac1e1e99a0ee5
--- /dev/null
+++ b/tests/loop_fuse/command.sh
@@ -0,0 +1,11 @@
+# first command is to emit ir for test case
+clang -S -emit-llvm loop_fuse.c -Xclang -disable-O0-optnone
+
+#second command is to clean up ir so that scev can understand it
+opt -mem2reg -loop-simplify -instcombine -instnamer -indvars loop_fuse.ll -S -o loop_fuse_out.ll
+
+#Third command will run loopfusion
+opt -load ${LLVM_HOME}/build/lib/LLVMLoopFusion.so -loopfusion loop_fuse_out.ll -enable-new-pm=0 -S -o loop_fuse_out1.ll
+
+#To create cfg
+opt -analyze -dot-cfg -enable-new-pm=0 loop_fuse_out1.ll
\ No newline at end of file
diff --git a/tests/loop_fuse/loop_fuse.c b/tests/loop_fuse/loop_fuse.c
new file mode 100644
index 00000000000000..e0189dd76ec488
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse.c
@@ -0,0 +1,10 @@
+void init(int *a, int *b, int *c, int n) {
+ for (int i = 0; i < n; i++) {
+ c[i] = i + i;
+ b[i] = i * i;
+ }
+
+ for (int i = 0; i < n; i++) {
+ a[i] = b[i] + c[i];
+ }
+}
\ No newline at end of file
diff --git a/tests/loop_fuse/loop_fuse.ll b/tests/loop_fuse/loop_fuse.ll
new file mode 100644
index 00000000000000..5bdecfd993a5e7
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse.ll
@@ -0,0 +1,103 @@
+; ModuleID = 'loop_fuse.c'
+source_filename = "loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %a.addr = alloca i32*, align 8
+ %b.addr = alloca i32*, align 8
+ %c.addr = alloca i32*, align 8
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ %i3 = alloca i32, align 4
+ store i32* %a, i32** %a.addr, align 8
+ store i32* %b, i32** %b.addr, align 8
+ store i32* %c, i32** %c.addr, align 8
+ store i32 %n, i32* %n.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
+ %add = add nsw i32 %2, %3
+ %4 = load i32*, i32** %c.addr, align 8
+ %5 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %5 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %6 = load i32, i32* %i, align 4
+ %7 = load i32, i32* %i, align 4
+ %mul = mul nsw i32 %6, %7
+ %8 = load i32*, i32** %b.addr, align 8
+ %9 = load i32, i32* %i, align 4
+ %idxprom1 = sext i32 %9 to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %8, i64 %idxprom1
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %10 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 0, i32* %i3, align 4
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %11 = load i32, i32* %i3, align 4
+ %12 = load i32, i32* %n.addr, align 4
+ %cmp5 = icmp slt i32 %11, %12
+ br i1 %cmp5, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %13 = load i32*, i32** %b.addr, align 8
+ %14 = load i32, i32* %i3, align 4
+ %idxprom7 = sext i32 %14 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %13, i64 %idxprom7
+ %15 = load i32, i32* %arrayidx8, align 4
+ %16 = load i32*, i32** %c.addr, align 8
+ %17 = load i32, i32* %i3, align 4
+ %idxprom9 = sext i32 %17 to i64
+ %arrayidx10 = getelementptr inbounds i32, i32* %16, i64 %idxprom9
+ %18 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %15, %18
+ %19 = load i32*, i32** %a.addr, align 8
+ %20 = load i32, i32* %i3, align 4
+ %idxprom12 = sext i32 %20 to i64
+ %arrayidx13 = getelementptr inbounds i32, i32* %19, i64 %idxprom12
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %21 = load i32, i32* %i3, align 4
+ %inc15 = add nsw i32 %21, 1
+ store i32 %inc15, i32* %i3, align 4
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/loop_fuse_out.ll b/tests/loop_fuse/loop_fuse_out.ll
new file mode 100644
index 00000000000000..5f5cde757646c6
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse_out.ll
@@ -0,0 +1,75 @@
+; ModuleID = 'loop_fuse.ll'
+source_filename = "loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ %wide.trip.count7 = zext i32 %smax to i64
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc14 ], [ 0, %for.end ]
+ %exitcond8 = icmp ne i64 %indvars.iv4, %wide.trip.count7
+ br i1 %exitcond8, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv4
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv4
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv4
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/loop_fuse_out1.ll b/tests/loop_fuse/loop_fuse_out1.ll
new file mode 100644
index 00000000000000..bd15857dfeb13a
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse_out1.ll
@@ -0,0 +1,61 @@
+; ModuleID = 'loop_fuse_out.ll'
+source_filename = "loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end16
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.body6
+
+for.inc: ; preds = %for.body6
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.body6: ; preds = %for.body
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc
+
+for.end16: ; preds = %for.cond
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
diff --git a/tests/loop_fuse/negative_loop_fuse.c b/tests/loop_fuse/negative_loop_fuse.c
new file mode 100644
index 00000000000000..ca7a77bd3b3731
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse.c
@@ -0,0 +1,10 @@
+void init(int *a, int *b, int *c, int n) {
+ for (int i = 3; i < n; i++) {
+ c[i] = i + i;
+ b[i] = i * i;
+ }
+
+ for (int i = 5; i < n; i++) {
+ a[i] = b[i] + c[i];
+ }
+}
\ No newline at end of file
diff --git a/tests/loop_fuse/negative_loop_fuse.ll b/tests/loop_fuse/negative_loop_fuse.ll
new file mode 100644
index 00000000000000..a5646e46627d58
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse.ll
@@ -0,0 +1,103 @@
+; ModuleID = 'negative_loop_fuse.c'
+source_filename = "negative_loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %a.addr = alloca i32*, align 8
+ %b.addr = alloca i32*, align 8
+ %c.addr = alloca i32*, align 8
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ %i3 = alloca i32, align 4
+ store i32* %a, i32** %a.addr, align 8
+ store i32* %b, i32** %b.addr, align 8
+ store i32* %c, i32** %c.addr, align 8
+ store i32 %n, i32* %n.addr, align 4
+ store i32 3, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
+ %add = add nsw i32 %2, %3
+ %4 = load i32*, i32** %c.addr, align 8
+ %5 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %5 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %6 = load i32, i32* %i, align 4
+ %7 = load i32, i32* %i, align 4
+ %mul = mul nsw i32 %6, %7
+ %8 = load i32*, i32** %b.addr, align 8
+ %9 = load i32, i32* %i, align 4
+ %idxprom1 = sext i32 %9 to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %8, i64 %idxprom1
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %10 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 5, i32* %i3, align 4
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %11 = load i32, i32* %i3, align 4
+ %12 = load i32, i32* %n.addr, align 4
+ %cmp5 = icmp slt i32 %11, %12
+ br i1 %cmp5, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %13 = load i32*, i32** %b.addr, align 8
+ %14 = load i32, i32* %i3, align 4
+ %idxprom7 = sext i32 %14 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %13, i64 %idxprom7
+ %15 = load i32, i32* %arrayidx8, align 4
+ %16 = load i32*, i32** %c.addr, align 8
+ %17 = load i32, i32* %i3, align 4
+ %idxprom9 = sext i32 %17 to i64
+ %arrayidx10 = getelementptr inbounds i32, i32* %16, i64 %idxprom9
+ %18 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %15, %18
+ %19 = load i32*, i32** %a.addr, align 8
+ %20 = load i32, i32* %i3, align 4
+ %idxprom12 = sext i32 %20 to i64
+ %arrayidx13 = getelementptr inbounds i32, i32* %19, i64 %idxprom12
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %21 = load i32, i32* %i3, align 4
+ %inc15 = add nsw i32 %21, 1
+ store i32 %inc15, i32* %i3, align 4
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse_out.ll b/tests/loop_fuse/negative_loop_fuse_out.ll
new file mode 100644
index 00000000000000..100883a6e9d56e
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse_out.ll
@@ -0,0 +1,76 @@
+; ModuleID = 'negative_loop_fuse.ll'
+source_filename = "negative_loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 3)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 3, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ %smax7 = call i32 @llvm.smax.i32(i32 %n, i32 5)
+ %wide.trip.count8 = zext i32 %smax7 to i64
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc14 ], [ 5, %for.end ]
+ %exitcond9 = icmp ne i64 %indvars.iv4, %wide.trip.count8
+ br i1 %exitcond9, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv4
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv4
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv4
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse_out1.ll b/tests/loop_fuse/negative_loop_fuse_out1.ll
new file mode 100644
index 00000000000000..1eabb624ad1dc4
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse_out1.ll
@@ -0,0 +1,76 @@
+; ModuleID = 'negative_loop_fuse_out.ll'
+source_filename = "negative_loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 3)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 3, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ %smax7 = call i32 @llvm.smax.i32(i32 %n, i32 5)
+ %wide.trip.count8 = zext i32 %smax7 to i64
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc14 ], [ 5, %for.end ]
+ %exitcond9 = icmp ne i64 %indvars.iv4, %wide.trip.count8
+ br i1 %exitcond9, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv4
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv4
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv4
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
>From 777a30c4f97db458e06e228b49d8d5f0e4127197 Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Thu, 7 Jul 2022 10:09:06 +0300
Subject: [PATCH 6/8] Adding Loop Fusion pass
---
llvm/lib/Transforms/LoopFusion/LoopFusion.cpp | 124 ++++++++++--------
tests/loop_fuse/.init.dot | 42 ++++--
tests/loop_fuse/loop_fuse.c | 8 ++
tests/loop_fuse/loop_fuse.ll | 112 +++++++++++++---
tests/loop_fuse/loop_fuse_out.ll | 64 +++++++--
tests/loop_fuse/loop_fuse_out1.ll | 67 ++++++++--
tests/loop_fuse/negative_loop_fuse.ll | 2 +-
tests/loop_fuse/negative_loop_fuse_out.ll | 2 +-
tests/loop_fuse/negative_loop_fuse_out1.ll | 2 +-
9 files changed, 315 insertions(+), 108 deletions(-)
diff --git a/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp b/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
index f82d6e597d858b..d3890e8c00009b 100644
--- a/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
+++ b/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
@@ -52,26 +52,39 @@ struct LoopFusion : public FunctionPass {
LoopVector.push_back(L);
}
- // Function to perform basic checks on the two loops
- if (fuseCheck(LoopVector[1], LoopVector[0]))
- // Function to perform fusing on the two loops
- fuseBody(LoopVector[1], LoopVector[0], F);
+ int LoopCount = LoopVector.size();
+ if (LoopCount < 2) {
+ llvm::errs() << "The program contains less no of loops to fuse\n";
+ return false;
+ }
+
+ // Check for each combinations of loops are fusable
+ for (int i = 0; i < LoopCount; i++) {
+ for (int j = i + 1; j < LoopCount; j++) {
+ // Function to perform basic checks on the two loops
+ if (fuseCheck(LoopVector[j], LoopVector[i]))
+ // Function to perform fusing on the two loops
+ fuseBody(LoopVector[j], LoopVector[i], F);
+ break;
+ }
+ }
return false;
}
void fuseBody(Loop *Loop1, Loop *Loop2, Function &F) {
- BasicBlock *Body1 = nullptr;
- BasicBlock *Body2 = nullptr;
BasicBlock *Header1 = nullptr;
BasicBlock *Latch1 = nullptr;
BasicBlock *Exit2 = nullptr;
- Body1 = getBody(Loop1);
- Body2 = getBody(Loop2);
- Header1 = getHeader(Loop1);
- Latch1 = getLoopLatch(Loop1);
- Exit2 = getLoopExit(Loop2);
+ BasicBlock *Body1 = getBody(Loop1);
+ BasicBlock *Body2 = getBody(Loop2);
+ Header1 = Loop1->getHeader();
+ Latch1 = Loop1->getLoopLatch();
+ Exit2 = Loop2->getExitBlock();
+
+ assert(Body1 && Body2 && Header1 && Latch1 && Exit2 &&
+ "NULL Pointer encountered\n");
PHINode *Phi1 = Loop1->getCanonicalInductionVariable();
PHINode *Phi2 = Loop2->getCanonicalInductionVariable();
@@ -80,15 +93,21 @@ struct LoopFusion : public FunctionPass {
Phi2->replaceAllUsesWith(Phi1);
for (BasicBlock &BB : F) {
+
+ if (isa<ReturnInst>(BB.getTerminator()))
+ continue;
+ // Get the branch Instruction every block.
BranchInst *BI = dyn_cast<BranchInst>(BB.getTerminator());
+ // Set the successor of first Body block to Body of the second block.
if (&BB == Body1) {
BI->setSuccessor(0, Body2);
}
-
+ // Set the successor of second body block to Latch of the first block.
if (&BB == Body2) {
BI->setSuccessor(0, Latch1);
}
-
+ // Set the successor of first header block to exit of the second as its
+ // contains return insn.
if (&BB == Header1) {
BI->setSuccessor(1, Exit2);
}
@@ -99,31 +118,16 @@ struct LoopFusion : public FunctionPass {
// Function to get Loop Body Blocks.
BasicBlock *getBody(Loop *L) {
+ BasicBlock *NullBB = nullptr;
for (BasicBlock *BB : L->getBlocks()) {
BasicBlock *HeaderBlock = L->getHeader();
if ((HeaderBlock != BB) && !(L->isLoopLatch(BB))) {
return BB;
}
}
- return {};
+ return NullBB;
}
- // Function to get Loop Header Blocks.
- BasicBlock *getHeader(Loop *L) { return L->getHeader(); }
-
- // Function to get Loop Latch Blocks.
- BasicBlock *getLoopLatch(Loop *L) {
- for (BasicBlock *BB : L->getBlocks()) {
- if (L->isLoopLatch(BB)) {
- return BB;
- }
- }
- return {};
- }
-
- // Function to get Loop exit blocks.
- BasicBlock *getLoopExit(Loop *L) { return L->getExitBlock(); }
-
bool adjacent(Loop *Loop1, Loop *Loop2) {
BasicBlock *Bb1 = Loop1->getExitBlock();
@@ -131,14 +135,6 @@ struct LoopFusion : public FunctionPass {
// If exit block and preHeader are not same.
if (Bb1 != Bb2) {
- if (Bb1->size() != 1)
- return false;
- if (Bb1->getTerminator()->getSuccessor(0) != Bb2)
- return false;
- if (Bb1 == nullptr || Bb2 == nullptr) {
- llvm::errs() << "NULL Pointer encountered\n";
- return false;
- }
return false;
}
return true;
@@ -147,48 +143,68 @@ struct LoopFusion : public FunctionPass {
// Helper function to check and fuse two loops.
bool fuseCheck(Loop *L1, Loop *L2) {
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Check if the two loops are adjacent.
if (!adjacent(L1, L2)) {
- llvm::errs() << "The two loops are not adjacent.CANNOT fuse\n";
+ llvm::errs() << "The two loops " << L1->getName() << " and "
+ << L2->getName() << " are not adjacent. CANNOT fuse.\n";
return false;
}
// Check if the start integer is same.
- if (startValue(*L1, *SE) != startValue(*L2, *SE)) {
- llvm::errs() << "The loop check starting value is not same.CANNOT fuse\n";
+ if (startValue(*L1) != startValue(*L2)) {
+ llvm::errs() << "The loop check starting values of 2 loops "
+ << L1->getName() << " and " << L2->getName()
+ << " are not same. CANNOT fuse.\n";
return false;
}
// Check if the limit integer is same.
if (limitValue(L1) != limitValue(L2)) {
- llvm::errs() << "The loop check limiting value is not same.CANNOT fuse\n";
+ llvm::errs() << "The loop check limiting value of 2 loops "
+ << L1->getName() << " and " << L2->getName()
+ << " are not same. CANNOT fuse.\n";
return false;
}
+ llvm::errs() << "The two loops " << *L1 << " and " << *L2
+ << " are being fused.\n";
return true;
}
// Check if the start value is same.
- int startValue(Loop &LoopV, ScalarEvolution &SE) {
+ Value *startValue(Loop &LoopV) {
for (auto &IndVar : LoopV.getHeader()->phis()) {
Value *V = IndVar.getOperand(1);
- auto startValue = dyn_cast<ConstantInt>(V);
- return startValue->getSExtValue();
+ return V;
}
- return {};
+ return nullptr;
}
// Check if the limit value is same.
Value *limitValue(Loop *LoopV) {
- Value *end;
- for (Use &U : LoopV->getHeader()->getFirstNonPHI()->operands()) {
- if (!dyn_cast<PHINode>(U.get())) {
- Instruction *I = dyn_cast<Instruction>(U.get());
- for (Use &U : I->operands())
- end = U.get();
+ Value *End, *ContEnd;
+
+ BasicBlock *BB = LoopV->getHeader();
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Check instruction is compare
+ if (isa<ICmpInst>(I)) {
+ ContEnd = I->getOperand(1);
+ // Check end value is constant
+ if (dyn_cast<Constant>(ContEnd)) {
+ return ContEnd;
+ } else {
+ for (Use &U : LoopV->getHeader()->getFirstNonPHI()->operands()) {
+ if (!dyn_cast<PHINode>(U.get())) {
+ Instruction *I = dyn_cast<Instruction>(U.get());
+ for (Use &U : I->operands())
+ End = U.get();
+ return End;
+ }
+ }
+ }
}
}
- return end;
+
+ return nullptr;
}
// We don't modify the program, so we preserve all analyses.
diff --git a/tests/loop_fuse/.init.dot b/tests/loop_fuse/.init.dot
index cecd6d2978d441..7850606709b6c5 100644
--- a/tests/loop_fuse/.init.dot
+++ b/tests/loop_fuse/.init.dot
@@ -1,16 +1,34 @@
digraph "CFG for 'init' function" {
label="CFG for 'init' function";
- Node0x558a60718b60 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{entry:\l %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)\l %wide.trip.count = zext i32 %smax to i64\l br label %for.cond\l}"];
- Node0x558a60718b60 -> Node0x558a607194d0;
- Node0x558a607194d0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond: \l %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]\l %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count\l br i1 %exitcond, label %for.body, label %for.end16\l|{<s0>T|<s1>F}}"];
- Node0x558a607194d0:s0 -> Node0x558a60719900;
- Node0x558a607194d0:s1 -> Node0x558a607199e0;
- Node0x558a60719900 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body: \l %0 = shl nuw nsw i64 %indvars.iv, 1\l %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv\l %1 = trunc i64 %0 to i32\l store i32 %1, i32* %arrayidx, align 4\l %2 = trunc i64 %indvars.iv to i32\l %mul = mul nsw i32 %2, %2\l %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv\l store i32 %mul, i32* %arrayidx2, align 4\l br label %for.body6\l}"];
- Node0x558a60719900 -> Node0x558a6071a270;
- Node0x558a60719680 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l br label %for.cond, !llvm.loop !4\l}"];
- Node0x558a60719680 -> Node0x558a607194d0;
- Node0x558a6071a270 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body6: \l %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv\l %i = load i32, i32* %arrayidx8, align 4\l %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv\l %i1 = load i32, i32* %arrayidx10, align 4\l %add11 = add nsw i32 %i, %i1\l %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv\l store i32 %add11, i32* %arrayidx13, align 4\l br label %for.inc\l}"];
- Node0x558a6071a270 -> Node0x558a60719680;
- Node0x558a607199e0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end16: \l ret void\l}"];
+ Node0x560bdf31b3a0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{entry:\l %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)\l %wide.trip.count = zext i32 %smax to i64\l br label %for.cond\l}"];
+ Node0x560bdf31b3a0 -> Node0x560bdf31bd10;
+ Node0x560bdf31bd10 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond: \l %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]\l %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count\l br i1 %exitcond, label %for.body, label %for.end\l|{<s0>T|<s1>F}}"];
+ Node0x560bdf31bd10:s0 -> Node0x560bdf31c140;
+ Node0x560bdf31bd10:s1 -> Node0x560bdf31c220;
+ Node0x560bdf31c140 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body: \l %0 = shl nuw nsw i64 %indvars.iv, 1\l %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv\l %1 = trunc i64 %0 to i32\l store i32 %1, i32* %arrayidx, align 4\l %2 = trunc i64 %indvars.iv to i32\l %mul = mul nsw i32 %2, %2\l %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv\l store i32 %mul, i32* %arrayidx2, align 4\l br label %for.inc\l}"];
+ Node0x560bdf31c140 -> Node0x560bdf31bec0;
+ Node0x560bdf31bec0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l br label %for.cond, !llvm.loop !4\l}"];
+ Node0x560bdf31bec0 -> Node0x560bdf31bd10;
+ Node0x560bdf31c220 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end: \l br label %for.cond4\l}"];
+ Node0x560bdf31c220 -> Node0x560bdf31d730;
+ Node0x560bdf31d730 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond4: \l %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc14 ], [ 0, %for.end ]\l %exitcond11 = icmp ne i64 %indvars.iv8, 10\l br i1 %exitcond11, label %for.body6, label %for.end30\l|{<s0>T|<s1>F}}"];
+ Node0x560bdf31d730:s0 -> Node0x560bdf31da60;
+ Node0x560bdf31d730:s1 -> Node0x560bdf31dae0;
+ Node0x560bdf31da60 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body6: \l %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8\l %i = load i32, i32* %arrayidx8, align 4\l %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8\l %i1 = load i32, i32* %arrayidx10, align 4\l %add11 = add nsw i32 %i, %i1\l %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8\l store i32 %add11, i32* %arrayidx13, align 4\l br label %for.body20\l}"];
+ Node0x560bdf31da60 -> Node0x560bdf31e460;
+ Node0x560bdf31d8a0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc14: \l %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1\l br label %for.cond4, !llvm.loop !6\l}"];
+ Node0x560bdf31d8a0 -> Node0x560bdf31d730;
+ Node0x560bdf31e460 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body20: \l %arrayidx22 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8\l %i2 = load i32, i32* %arrayidx22, align 4\l %arrayidx24 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8\l %i3 = load i32, i32* %arrayidx24, align 4\l %add25 = add nsw i32 %i2, %i3\l %arrayidx27 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8\l store i32 %add25, i32* %arrayidx27, align 4\l br label %for.inc14\l}"];
+ Node0x560bdf31e460 -> Node0x560bdf31d8a0;
+ Node0x560bdf31dae0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end30: \l %wide.trip.count19 = zext i32 %smax to i64\l br label %for.cond32\l}"];
+ Node0x560bdf31dae0 -> Node0x560bdf31ed40;
+ Node0x560bdf31ed40 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond32: \l %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc42 ], [ 0, %for.end30 ]\l %exitcond20 = icmp ne i64 %indvars.iv16, %wide.trip.count19\l br i1 %exitcond20, label %for.body34, label %for.end44\l|{<s0>T|<s1>F}}"];
+ Node0x560bdf31ed40:s0 -> Node0x560bdf31f070;
+ Node0x560bdf31ed40:s1 -> Node0x560bdf31f0c0;
+ Node0x560bdf31f070 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body34: \l %arrayidx36 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv16\l %i4 = load i32, i32* %arrayidx36, align 4\l %arrayidx38 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv16\l %i5 = load i32, i32* %arrayidx38, align 4\l %add39 = add nsw i32 %i4, %i5\l %arrayidx41 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv16\l store i32 %add39, i32* %arrayidx41, align 4\l br label %for.inc42\l}"];
+ Node0x560bdf31f070 -> Node0x560bdf31eeb0;
+ Node0x560bdf31eeb0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc42: \l %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1\l br label %for.cond32, !llvm.loop !7\l}"];
+ Node0x560bdf31eeb0 -> Node0x560bdf31ed40;
+ Node0x560bdf31f0c0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end44: \l ret void\l}"];
}
diff --git a/tests/loop_fuse/loop_fuse.c b/tests/loop_fuse/loop_fuse.c
index e0189dd76ec488..fc29abd9cdd2a3 100644
--- a/tests/loop_fuse/loop_fuse.c
+++ b/tests/loop_fuse/loop_fuse.c
@@ -4,6 +4,14 @@ void init(int *a, int *b, int *c, int n) {
b[i] = i * i;
}
+ for (int i = 0; i < 10; i++) {
+ a[i] = b[i] + c[i];
+ }
+
+ for (int i = 0; i < 10; i++) {
+ a[i] = b[i] + c[i];
+ }
+
for (int i = 0; i < n; i++) {
a[i] = b[i] + c[i];
}
diff --git a/tests/loop_fuse/loop_fuse.ll b/tests/loop_fuse/loop_fuse.ll
index 5bdecfd993a5e7..50535e86c160b3 100644
--- a/tests/loop_fuse/loop_fuse.ll
+++ b/tests/loop_fuse/loop_fuse.ll
@@ -12,6 +12,8 @@ entry:
%n.addr = alloca i32, align 4
%i = alloca i32, align 4
%i3 = alloca i32, align 4
+ %i17 = alloca i32, align 4
+ %i31 = alloca i32, align 4
store i32* %a, i32** %a.addr, align 8
store i32* %b, i32** %b.addr, align 8
store i32* %c, i32** %c.addr, align 8
@@ -56,36 +58,104 @@ for.end: ; preds = %for.cond
for.cond4: ; preds = %for.inc14, %for.end
%11 = load i32, i32* %i3, align 4
- %12 = load i32, i32* %n.addr, align 4
- %cmp5 = icmp slt i32 %11, %12
+ %cmp5 = icmp slt i32 %11, 10
br i1 %cmp5, label %for.body6, label %for.end16
for.body6: ; preds = %for.cond4
- %13 = load i32*, i32** %b.addr, align 8
- %14 = load i32, i32* %i3, align 4
- %idxprom7 = sext i32 %14 to i64
- %arrayidx8 = getelementptr inbounds i32, i32* %13, i64 %idxprom7
- %15 = load i32, i32* %arrayidx8, align 4
- %16 = load i32*, i32** %c.addr, align 8
- %17 = load i32, i32* %i3, align 4
- %idxprom9 = sext i32 %17 to i64
- %arrayidx10 = getelementptr inbounds i32, i32* %16, i64 %idxprom9
- %18 = load i32, i32* %arrayidx10, align 4
- %add11 = add nsw i32 %15, %18
- %19 = load i32*, i32** %a.addr, align 8
- %20 = load i32, i32* %i3, align 4
- %idxprom12 = sext i32 %20 to i64
- %arrayidx13 = getelementptr inbounds i32, i32* %19, i64 %idxprom12
+ %12 = load i32*, i32** %b.addr, align 8
+ %13 = load i32, i32* %i3, align 4
+ %idxprom7 = sext i32 %13 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %12, i64 %idxprom7
+ %14 = load i32, i32* %arrayidx8, align 4
+ %15 = load i32*, i32** %c.addr, align 8
+ %16 = load i32, i32* %i3, align 4
+ %idxprom9 = sext i32 %16 to i64
+ %arrayidx10 = getelementptr inbounds i32, i32* %15, i64 %idxprom9
+ %17 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %14, %17
+ %18 = load i32*, i32** %a.addr, align 8
+ %19 = load i32, i32* %i3, align 4
+ %idxprom12 = sext i32 %19 to i64
+ %arrayidx13 = getelementptr inbounds i32, i32* %18, i64 %idxprom12
store i32 %add11, i32* %arrayidx13, align 4
br label %for.inc14
for.inc14: ; preds = %for.body6
- %21 = load i32, i32* %i3, align 4
- %inc15 = add nsw i32 %21, 1
+ %20 = load i32, i32* %i3, align 4
+ %inc15 = add nsw i32 %20, 1
store i32 %inc15, i32* %i3, align 4
br label %for.cond4, !llvm.loop !6
for.end16: ; preds = %for.cond4
+ store i32 0, i32* %i17, align 4
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc28, %for.end16
+ %21 = load i32, i32* %i17, align 4
+ %cmp19 = icmp slt i32 %21, 10
+ br i1 %cmp19, label %for.body20, label %for.end30
+
+for.body20: ; preds = %for.cond18
+ %22 = load i32*, i32** %b.addr, align 8
+ %23 = load i32, i32* %i17, align 4
+ %idxprom21 = sext i32 %23 to i64
+ %arrayidx22 = getelementptr inbounds i32, i32* %22, i64 %idxprom21
+ %24 = load i32, i32* %arrayidx22, align 4
+ %25 = load i32*, i32** %c.addr, align 8
+ %26 = load i32, i32* %i17, align 4
+ %idxprom23 = sext i32 %26 to i64
+ %arrayidx24 = getelementptr inbounds i32, i32* %25, i64 %idxprom23
+ %27 = load i32, i32* %arrayidx24, align 4
+ %add25 = add nsw i32 %24, %27
+ %28 = load i32*, i32** %a.addr, align 8
+ %29 = load i32, i32* %i17, align 4
+ %idxprom26 = sext i32 %29 to i64
+ %arrayidx27 = getelementptr inbounds i32, i32* %28, i64 %idxprom26
+ store i32 %add25, i32* %arrayidx27, align 4
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body20
+ %30 = load i32, i32* %i17, align 4
+ %inc29 = add nsw i32 %30, 1
+ store i32 %inc29, i32* %i17, align 4
+ br label %for.cond18, !llvm.loop !7
+
+for.end30: ; preds = %for.cond18
+ store i32 0, i32* %i31, align 4
+ br label %for.cond32
+
+for.cond32: ; preds = %for.inc42, %for.end30
+ %31 = load i32, i32* %i31, align 4
+ %32 = load i32, i32* %n.addr, align 4
+ %cmp33 = icmp slt i32 %31, %32
+ br i1 %cmp33, label %for.body34, label %for.end44
+
+for.body34: ; preds = %for.cond32
+ %33 = load i32*, i32** %b.addr, align 8
+ %34 = load i32, i32* %i31, align 4
+ %idxprom35 = sext i32 %34 to i64
+ %arrayidx36 = getelementptr inbounds i32, i32* %33, i64 %idxprom35
+ %35 = load i32, i32* %arrayidx36, align 4
+ %36 = load i32*, i32** %c.addr, align 8
+ %37 = load i32, i32* %i31, align 4
+ %idxprom37 = sext i32 %37 to i64
+ %arrayidx38 = getelementptr inbounds i32, i32* %36, i64 %idxprom37
+ %38 = load i32, i32* %arrayidx38, align 4
+ %add39 = add nsw i32 %35, %38
+ %39 = load i32*, i32** %a.addr, align 8
+ %40 = load i32, i32* %i31, align 4
+ %idxprom40 = sext i32 %40 to i64
+ %arrayidx41 = getelementptr inbounds i32, i32* %39, i64 %idxprom40
+ store i32 %add39, i32* %arrayidx41, align 4
+ br label %for.inc42
+
+for.inc42: ; preds = %for.body34
+ %41 = load i32, i32* %i31, align 4
+ %inc43 = add nsw i32 %41, 1
+ store i32 %inc43, i32* %i31, align 4
+ br label %for.cond32, !llvm.loop !8
+
+for.end44: ; preds = %for.cond32
ret void
}
@@ -97,7 +167,9 @@ attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vec
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
+!7 = distinct !{!7, !5}
+!8 = distinct !{!8, !5}
diff --git a/tests/loop_fuse/loop_fuse_out.ll b/tests/loop_fuse/loop_fuse_out.ll
index 5f5cde757646c6..82f6059af1e8df 100644
--- a/tests/loop_fuse/loop_fuse_out.ll
+++ b/tests/loop_fuse/loop_fuse_out.ll
@@ -31,29 +31,73 @@ for.inc: ; preds = %for.body
br label %for.cond, !llvm.loop !4
for.end: ; preds = %for.cond
- %wide.trip.count7 = zext i32 %smax to i64
br label %for.cond4
for.cond4: ; preds = %for.inc14, %for.end
- %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc14 ], [ 0, %for.end ]
- %exitcond8 = icmp ne i64 %indvars.iv4, %wide.trip.count7
- br i1 %exitcond8, label %for.body6, label %for.end16
+ %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc14 ], [ 0, %for.end ]
+ %exitcond11 = icmp ne i64 %indvars.iv8, 10
+ br i1 %exitcond11, label %for.body6, label %for.end16
for.body6: ; preds = %for.cond4
- %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8
%i = load i32, i32* %arrayidx8, align 4
- %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8
%i1 = load i32, i32* %arrayidx10, align 4
%add11 = add nsw i32 %i, %i1
- %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv4
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8
store i32 %add11, i32* %arrayidx13, align 4
br label %for.inc14
for.inc14: ; preds = %for.body6
- %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
br label %for.cond4, !llvm.loop !6
for.end16: ; preds = %for.cond4
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc28, %for.end16
+ %indvars.iv12 = phi i64 [ %indvars.iv.next13, %for.inc28 ], [ 0, %for.end16 ]
+ %exitcond15 = icmp ne i64 %indvars.iv12, 10
+ br i1 %exitcond15, label %for.body20, label %for.end30
+
+for.body20: ; preds = %for.cond18
+ %arrayidx22 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv12
+ %i2 = load i32, i32* %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv12
+ %i3 = load i32, i32* %arrayidx24, align 4
+ %add25 = add nsw i32 %i2, %i3
+ %arrayidx27 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv12
+ store i32 %add25, i32* %arrayidx27, align 4
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body20
+ %indvars.iv.next13 = add nuw nsw i64 %indvars.iv12, 1
+ br label %for.cond18, !llvm.loop !7
+
+for.end30: ; preds = %for.cond18
+ %wide.trip.count19 = zext i32 %smax to i64
+ br label %for.cond32
+
+for.cond32: ; preds = %for.inc42, %for.end30
+ %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc42 ], [ 0, %for.end30 ]
+ %exitcond20 = icmp ne i64 %indvars.iv16, %wide.trip.count19
+ br i1 %exitcond20, label %for.body34, label %for.end44
+
+for.body34: ; preds = %for.cond32
+ %arrayidx36 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv16
+ %i4 = load i32, i32* %arrayidx36, align 4
+ %arrayidx38 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv16
+ %i5 = load i32, i32* %arrayidx38, align 4
+ %add39 = add nsw i32 %i4, %i5
+ %arrayidx41 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv16
+ store i32 %add39, i32* %arrayidx41, align 4
+ br label %for.inc42
+
+for.inc42: ; preds = %for.body34
+ %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+ br label %for.cond32, !llvm.loop !8
+
+for.end44: ; preds = %for.cond32
ret void
}
@@ -69,7 +113,9 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
+!7 = distinct !{!7, !5}
+!8 = distinct !{!8, !5}
diff --git a/tests/loop_fuse/loop_fuse_out1.ll b/tests/loop_fuse/loop_fuse_out1.ll
index bd15857dfeb13a..330dd11d463ab2 100644
--- a/tests/loop_fuse/loop_fuse_out1.ll
+++ b/tests/loop_fuse/loop_fuse_out1.ll
@@ -13,7 +13,7 @@ entry:
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
- br i1 %exitcond, label %for.body, label %for.end16
+ br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%0 = shl nuw nsw i64 %indvars.iv, 1
@@ -24,23 +24,68 @@ for.body: ; preds = %for.cond
%mul = mul nsw i32 %2, %2
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
store i32 %mul, i32* %arrayidx2, align 4
- br label %for.body6
+ br label %for.inc
-for.inc: ; preds = %for.body6
+for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond, !llvm.loop !4
-for.body6: ; preds = %for.body
- %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+for.end: ; preds = %for.cond
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc14 ], [ 0, %for.end ]
+ %exitcond11 = icmp ne i64 %indvars.iv8, 10
+ br i1 %exitcond11, label %for.body6, label %for.end30
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8
%i = load i32, i32* %arrayidx8, align 4
- %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8
%i1 = load i32, i32* %arrayidx10, align 4
%add11 = add nsw i32 %i, %i1
- %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8
store i32 %add11, i32* %arrayidx13, align 4
- br label %for.inc
+ br label %for.body20
+
+for.inc14: ; preds = %for.body20
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.body20: ; preds = %for.body6
+ %arrayidx22 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8
+ %i2 = load i32, i32* %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8
+ %i3 = load i32, i32* %arrayidx24, align 4
+ %add25 = add nsw i32 %i2, %i3
+ %arrayidx27 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8
+ store i32 %add25, i32* %arrayidx27, align 4
+ br label %for.inc14
+
+for.end30: ; preds = %for.cond4
+ %wide.trip.count19 = zext i32 %smax to i64
+ br label %for.cond32
+
+for.cond32: ; preds = %for.inc42, %for.end30
+ %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc42 ], [ 0, %for.end30 ]
+ %exitcond20 = icmp ne i64 %indvars.iv16, %wide.trip.count19
+ br i1 %exitcond20, label %for.body34, label %for.end44
+
+for.body34: ; preds = %for.cond32
+ %arrayidx36 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv16
+ %i4 = load i32, i32* %arrayidx36, align 4
+ %arrayidx38 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv16
+ %i5 = load i32, i32* %arrayidx38, align 4
+ %add39 = add nsw i32 %i4, %i5
+ %arrayidx41 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv16
+ store i32 %add39, i32* %arrayidx41, align 4
+ br label %for.inc42
+
+for.inc42: ; preds = %for.body34
+ %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+ br label %for.cond32, !llvm.loop !7
-for.end16: ; preds = %for.cond
+for.end44: ; preds = %for.cond32
ret void
}
@@ -56,6 +101,8 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
+!7 = distinct !{!7, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse.ll b/tests/loop_fuse/negative_loop_fuse.ll
index a5646e46627d58..cf38ff8b6ff8e9 100644
--- a/tests/loop_fuse/negative_loop_fuse.ll
+++ b/tests/loop_fuse/negative_loop_fuse.ll
@@ -97,7 +97,7 @@ attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vec
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse_out.ll b/tests/loop_fuse/negative_loop_fuse_out.ll
index 100883a6e9d56e..b3fdb2fa9918a1 100644
--- a/tests/loop_fuse/negative_loop_fuse_out.ll
+++ b/tests/loop_fuse/negative_loop_fuse_out.ll
@@ -70,7 +70,7 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse_out1.ll b/tests/loop_fuse/negative_loop_fuse_out1.ll
index 1eabb624ad1dc4..a63151ac6cda1d 100644
--- a/tests/loop_fuse/negative_loop_fuse_out1.ll
+++ b/tests/loop_fuse/negative_loop_fuse_out1.ll
@@ -70,7 +70,7 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
-!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 7f049514ee22563de5f8817412efd6d7d83109cf)"}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
!6 = distinct !{!6, !5}
>From 307d24efdf6607d8699747788f03bf0bbf1f3e0a Mon Sep 17 00:00:00 2001
From: Shravan Kumar <87087331+shravankumar0811 at users.noreply.github.com>
Date: Wed, 13 Jul 2022 22:03:01 +0530
Subject: [PATCH 7/8] Adding Loop Fusion pass (#3)
* Adding Loop Fusion pass
* Adding Loop Fusion pass
Co-authored-by: Shravan Kumar <shkumar at habana.ai>
---
llvm/lib/Transforms/CMakeLists.txt | 3 +-
llvm/lib/Transforms/LoopFusion/CMakeLists.txt | 20 ++
llvm/lib/Transforms/LoopFusion/LoopFusion.cpp | 222 ++++++++++++++++++
.../Transforms/LoopFusion/LoopFusion.exports | 0
tests/CMakeLists.txt | 1 +
tests/loop_fuse/.init.dot | 34 +++
tests/loop_fuse/command.sh | 11 +
tests/loop_fuse/loop_fuse.c | 18 ++
tests/loop_fuse/loop_fuse.ll | 175 ++++++++++++++
tests/loop_fuse/loop_fuse_out.ll | 121 ++++++++++
tests/loop_fuse/loop_fuse_out1.ll | 108 +++++++++
tests/loop_fuse/negative_loop_fuse.c | 10 +
tests/loop_fuse/negative_loop_fuse.ll | 103 ++++++++
tests/loop_fuse/negative_loop_fuse_out.ll | 76 ++++++
tests/loop_fuse/negative_loop_fuse_out1.ll | 76 ++++++
15 files changed, 977 insertions(+), 1 deletion(-)
create mode 100644 llvm/lib/Transforms/LoopFusion/CMakeLists.txt
create mode 100644 llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
create mode 100644 llvm/lib/Transforms/LoopFusion/LoopFusion.exports
create mode 100644 tests/loop_fuse/.init.dot
create mode 100644 tests/loop_fuse/command.sh
create mode 100644 tests/loop_fuse/loop_fuse.c
create mode 100644 tests/loop_fuse/loop_fuse.ll
create mode 100644 tests/loop_fuse/loop_fuse_out.ll
create mode 100644 tests/loop_fuse/loop_fuse_out1.ll
create mode 100644 tests/loop_fuse/negative_loop_fuse.c
create mode 100644 tests/loop_fuse/negative_loop_fuse.ll
create mode 100644 tests/loop_fuse/negative_loop_fuse_out.ll
create mode 100644 tests/loop_fuse/negative_loop_fuse_out1.ll
diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt
index 8ace411e1ca82b..6b165fd71dfcfb 100644
--- a/llvm/lib/Transforms/CMakeLists.txt
+++ b/llvm/lib/Transforms/CMakeLists.txt
@@ -10,4 +10,5 @@ add_subdirectory(ObjCARC)
add_subdirectory(Coroutines)
add_subdirectory(CFGuard)
add_subdirectory(Cfcss)
-add_subdirectory(Scev)
\ No newline at end of file
+add_subdirectory(Scev)
+add_subdirectory(LoopFusion)
diff --git a/llvm/lib/Transforms/LoopFusion/CMakeLists.txt b/llvm/lib/Transforms/LoopFusion/CMakeLists.txt
new file mode 100644
index 00000000000000..6c0edac49c6ec2
--- /dev/null
+++ b/llvm/lib/Transforms/LoopFusion/CMakeLists.txt
@@ -0,0 +1,20 @@
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+if( NOT LLVM_REQUIRES_RTTI )
+ if( NOT LLVM_REQUIRES_EH )
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/LoopFusion.exports)
+ endif()
+endif()
+
+if(WIN32 OR CYGWIN)
+ set(LLVM_LINK_COMPONENTS Core Support)
+endif()
+
+add_llvm_library( LLVMLoopFusion MODULE BUILDTREE_ONLY
+LoopFusion.cpp
+
+ DEPENDS
+ intrinsics_gen
+ PLUGIN_TOOL
+ opt
+ )
diff --git a/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp b/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
new file mode 100644
index 00000000000000..d3890e8c00009b
--- /dev/null
+++ b/llvm/lib/Transforms/LoopFusion/LoopFusion.cpp
@@ -0,0 +1,222 @@
+/*===- LoopFusion.cpp -
+ This program is the implementation of a pass for loop fusion in LLVM compiler.
+Two loops, which are adjacent and have the same condition and increments with
+respect to the loop variable may be fused, i.e, their bodies may be executed one
+after the other with in a single loop. The decision to fuse the loops is taken
+based on the legality and profitability of the fusion. It should not be
+performed if the resulting code has anti-dependency or if the execution time of
+the program increases. Algorithm:
+1. Check 2 loops are can fuse.
+2. Replace the use of induction variable of 2nd loop with that of 1st loop.
+3. Combine the bodies of loop1 and loop2.
+3. Set the succesor of 1st loop’s header to exit block of 2nd loop.
+4. Delete the unwanted basic blocks of 2nd loop.
+===-------------------------------------------------------------------------------------------===*/
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "hello"
+
+namespace {
+// Scev - The second implementation with getAnalysisUsage implemented.
+struct LoopFusion : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopFusion() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override {
+
+ SmallVector<Loop *> LoopVector;
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ for (auto *L : *LI) {
+ LoopVector.push_back(L);
+ }
+
+ int LoopCount = LoopVector.size();
+ if (LoopCount < 2) {
+ llvm::errs() << "The program contains less no of loops to fuse\n";
+ return false;
+ }
+
+ // Check for each combinations of loops are fusable
+ for (int i = 0; i < LoopCount; i++) {
+ for (int j = i + 1; j < LoopCount; j++) {
+ // Function to perform basic checks on the two loops
+ if (fuseCheck(LoopVector[j], LoopVector[i]))
+ // Function to perform fusing on the two loops
+ fuseBody(LoopVector[j], LoopVector[i], F);
+ break;
+ }
+ }
+
+ return false;
+ }
+
+ void fuseBody(Loop *Loop1, Loop *Loop2, Function &F) {
+ BasicBlock *Header1 = nullptr;
+ BasicBlock *Latch1 = nullptr;
+ BasicBlock *Exit2 = nullptr;
+
+ BasicBlock *Body1 = getBody(Loop1);
+ BasicBlock *Body2 = getBody(Loop2);
+ Header1 = Loop1->getHeader();
+ Latch1 = Loop1->getLoopLatch();
+ Exit2 = Loop2->getExitBlock();
+
+ assert(Body1 && Body2 && Header1 && Latch1 && Exit2 &&
+ "NULL Pointer encountered\n");
+
+ PHINode *Phi1 = Loop1->getCanonicalInductionVariable();
+ PHINode *Phi2 = Loop2->getCanonicalInductionVariable();
+
+ // Replace the use of induction variable of 2nd loop with that of 1st loop.
+ Phi2->replaceAllUsesWith(Phi1);
+
+ for (BasicBlock &BB : F) {
+
+ if (isa<ReturnInst>(BB.getTerminator()))
+ continue;
+ // Get the branch Instruction every block.
+ BranchInst *BI = dyn_cast<BranchInst>(BB.getTerminator());
+ // Set the successor of first Body block to Body of the second block.
+ if (&BB == Body1) {
+ BI->setSuccessor(0, Body2);
+ }
+ // Set the successor of second body block to Latch of the first block.
+ if (&BB == Body2) {
+ BI->setSuccessor(0, Latch1);
+ }
+ // Set the successor of first header block to exit of the second as its
+ // contains return insn.
+ if (&BB == Header1) {
+ BI->setSuccessor(1, Exit2);
+ }
+ }
+ // Function to remove un-wanted basic blocks.
+ EliminateUnreachableBlocks(F);
+ }
+
+ // Function to get Loop Body Blocks.
+ BasicBlock *getBody(Loop *L) {
+ BasicBlock *NullBB = nullptr;
+ for (BasicBlock *BB : L->getBlocks()) {
+ BasicBlock *HeaderBlock = L->getHeader();
+ if ((HeaderBlock != BB) && !(L->isLoopLatch(BB))) {
+ return BB;
+ }
+ }
+ return NullBB;
+ }
+
+ bool adjacent(Loop *Loop1, Loop *Loop2) {
+
+ BasicBlock *Bb1 = Loop1->getExitBlock();
+ BasicBlock *Bb2 = Loop2->getLoopPreheader();
+
+ // If exit block and preHeader are not same.
+ if (Bb1 != Bb2) {
+ return false;
+ }
+ return true;
+ }
+
+ // Helper function to check and fuse two loops.
+ bool fuseCheck(Loop *L1, Loop *L2) {
+
+ // Check if the two loops are adjacent.
+ if (!adjacent(L1, L2)) {
+ llvm::errs() << "The two loops " << L1->getName() << " and "
+ << L2->getName() << " are not adjacent. CANNOT fuse.\n";
+ return false;
+ }
+
+ // Check if the start integer is same.
+ if (startValue(*L1) != startValue(*L2)) {
+ llvm::errs() << "The loop check starting values of 2 loops "
+ << L1->getName() << " and " << L2->getName()
+ << " are not same. CANNOT fuse.\n";
+ return false;
+ }
+
+ // Check if the limit integer is same.
+ if (limitValue(L1) != limitValue(L2)) {
+ llvm::errs() << "The loop check limiting value of 2 loops "
+ << L1->getName() << " and " << L2->getName()
+ << " are not same. CANNOT fuse.\n";
+ return false;
+ }
+ llvm::errs() << "The two loops " << *L1 << " and " << *L2
+ << " are being fused.\n";
+ return true;
+ }
+
+ // Check if the start value is same.
+ Value *startValue(Loop &LoopV) {
+ for (auto &IndVar : LoopV.getHeader()->phis()) {
+ Value *V = IndVar.getOperand(1);
+ return V;
+ }
+ return nullptr;
+ }
+
+ // Check if the limit value is same.
+ Value *limitValue(Loop *LoopV) {
+ Value *End, *ContEnd;
+
+ BasicBlock *BB = LoopV->getHeader();
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Check instruction is compare
+ if (isa<ICmpInst>(I)) {
+ ContEnd = I->getOperand(1);
+ // Check end value is constant
+ if (dyn_cast<Constant>(ContEnd)) {
+ return ContEnd;
+ } else {
+ for (Use &U : LoopV->getHeader()->getFirstNonPHI()->operands()) {
+ if (!dyn_cast<PHINode>(U.get())) {
+ Instruction *I = dyn_cast<Instruction>(U.get());
+ for (Use &U : I->operands())
+ End = U.get();
+ return End;
+ }
+ }
+ }
+ }
+ }
+
+ return nullptr;
+ }
+
+ // We don't modify the program, so we preserve all analyses.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+};
+} // namespace
+
+char LoopFusion::ID = 0;
+static RegisterPass<LoopFusion>
+ X("loopfusion",
+ "LoopFusion Implementation Pass (with getAnalysisUsage implemented)");
\ No newline at end of file
diff --git a/llvm/lib/Transforms/LoopFusion/LoopFusion.exports b/llvm/lib/Transforms/LoopFusion/LoopFusion.exports
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 0ab0d081f586b9..2321ce7a8e9ce7 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,3 +1,4 @@
add_subdirectory(cfcss)
add_subdirectory(assignment1)
add_subdirectory(scev)
+add_subdirectory(loop_fuse)
diff --git a/tests/loop_fuse/.init.dot b/tests/loop_fuse/.init.dot
new file mode 100644
index 00000000000000..7850606709b6c5
--- /dev/null
+++ b/tests/loop_fuse/.init.dot
@@ -0,0 +1,34 @@
+digraph "CFG for 'init' function" {
+ label="CFG for 'init' function";
+
+ Node0x560bdf31b3a0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{entry:\l %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)\l %wide.trip.count = zext i32 %smax to i64\l br label %for.cond\l}"];
+ Node0x560bdf31b3a0 -> Node0x560bdf31bd10;
+ Node0x560bdf31bd10 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond: \l %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]\l %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count\l br i1 %exitcond, label %for.body, label %for.end\l|{<s0>T|<s1>F}}"];
+ Node0x560bdf31bd10:s0 -> Node0x560bdf31c140;
+ Node0x560bdf31bd10:s1 -> Node0x560bdf31c220;
+ Node0x560bdf31c140 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body: \l %0 = shl nuw nsw i64 %indvars.iv, 1\l %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv\l %1 = trunc i64 %0 to i32\l store i32 %1, i32* %arrayidx, align 4\l %2 = trunc i64 %indvars.iv to i32\l %mul = mul nsw i32 %2, %2\l %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv\l store i32 %mul, i32* %arrayidx2, align 4\l br label %for.inc\l}"];
+ Node0x560bdf31c140 -> Node0x560bdf31bec0;
+ Node0x560bdf31bec0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l br label %for.cond, !llvm.loop !4\l}"];
+ Node0x560bdf31bec0 -> Node0x560bdf31bd10;
+ Node0x560bdf31c220 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end: \l br label %for.cond4\l}"];
+ Node0x560bdf31c220 -> Node0x560bdf31d730;
+ Node0x560bdf31d730 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond4: \l %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc14 ], [ 0, %for.end ]\l %exitcond11 = icmp ne i64 %indvars.iv8, 10\l br i1 %exitcond11, label %for.body6, label %for.end30\l|{<s0>T|<s1>F}}"];
+ Node0x560bdf31d730:s0 -> Node0x560bdf31da60;
+ Node0x560bdf31d730:s1 -> Node0x560bdf31dae0;
+ Node0x560bdf31da60 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body6: \l %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8\l %i = load i32, i32* %arrayidx8, align 4\l %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8\l %i1 = load i32, i32* %arrayidx10, align 4\l %add11 = add nsw i32 %i, %i1\l %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8\l store i32 %add11, i32* %arrayidx13, align 4\l br label %for.body20\l}"];
+ Node0x560bdf31da60 -> Node0x560bdf31e460;
+ Node0x560bdf31d8a0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc14: \l %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1\l br label %for.cond4, !llvm.loop !6\l}"];
+ Node0x560bdf31d8a0 -> Node0x560bdf31d730;
+ Node0x560bdf31e460 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body20: \l %arrayidx22 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8\l %i2 = load i32, i32* %arrayidx22, align 4\l %arrayidx24 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8\l %i3 = load i32, i32* %arrayidx24, align 4\l %add25 = add nsw i32 %i2, %i3\l %arrayidx27 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8\l store i32 %add25, i32* %arrayidx27, align 4\l br label %for.inc14\l}"];
+ Node0x560bdf31e460 -> Node0x560bdf31d8a0;
+ Node0x560bdf31dae0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end30: \l %wide.trip.count19 = zext i32 %smax to i64\l br label %for.cond32\l}"];
+ Node0x560bdf31dae0 -> Node0x560bdf31ed40;
+ Node0x560bdf31ed40 [shape=record,color="#b70d28ff", style=filled, fillcolor="#b70d2870",label="{for.cond32: \l %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc42 ], [ 0, %for.end30 ]\l %exitcond20 = icmp ne i64 %indvars.iv16, %wide.trip.count19\l br i1 %exitcond20, label %for.body34, label %for.end44\l|{<s0>T|<s1>F}}"];
+ Node0x560bdf31ed40:s0 -> Node0x560bdf31f070;
+ Node0x560bdf31ed40:s1 -> Node0x560bdf31f0c0;
+ Node0x560bdf31f070 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.body34: \l %arrayidx36 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv16\l %i4 = load i32, i32* %arrayidx36, align 4\l %arrayidx38 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv16\l %i5 = load i32, i32* %arrayidx38, align 4\l %add39 = add nsw i32 %i4, %i5\l %arrayidx41 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv16\l store i32 %add39, i32* %arrayidx41, align 4\l br label %for.inc42\l}"];
+ Node0x560bdf31f070 -> Node0x560bdf31eeb0;
+ Node0x560bdf31eeb0 [shape=record,color="#b70d28ff", style=filled, fillcolor="#bb1b2c70",label="{for.inc42: \l %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1\l br label %for.cond32, !llvm.loop !7\l}"];
+ Node0x560bdf31eeb0 -> Node0x560bdf31ed40;
+ Node0x560bdf31f0c0 [shape=record,color="#3d50c3ff", style=filled, fillcolor="#b9d0f970",label="{for.end44: \l ret void\l}"];
+}
diff --git a/tests/loop_fuse/command.sh b/tests/loop_fuse/command.sh
new file mode 100644
index 00000000000000..aac1e1e99a0ee5
--- /dev/null
+++ b/tests/loop_fuse/command.sh
@@ -0,0 +1,11 @@
+# first command is to emit ir for test case
+clang -S -emit-llvm loop_fuse.c -Xclang -disable-O0-optnone
+
+#second command is to clean up ir so that scev can understand it
+opt -mem2reg -loop-simplify -instcombine -instnamer -indvars loop_fuse.ll -S -o loop_fuse_out.ll
+
+#Third command will run loopfusion
+opt -load ${LLVM_HOME}/build/lib/LLVMLoopFusion.so -loopfusion loop_fuse_out.ll -enable-new-pm=0 -S -o loop_fuse_out1.ll
+
+#To create cfg
+opt -analyze -dot-cfg -enable-new-pm=0 loop_fuse_out1.ll
\ No newline at end of file
diff --git a/tests/loop_fuse/loop_fuse.c b/tests/loop_fuse/loop_fuse.c
new file mode 100644
index 00000000000000..fc29abd9cdd2a3
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse.c
@@ -0,0 +1,18 @@
+void init(int *a, int *b, int *c, int n) {
+ for (int i = 0; i < n; i++) {
+ c[i] = i + i;
+ b[i] = i * i;
+ }
+
+ for (int i = 0; i < 10; i++) {
+ a[i] = b[i] + c[i];
+ }
+
+ for (int i = 0; i < 10; i++) {
+ a[i] = b[i] + c[i];
+ }
+
+ for (int i = 0; i < n; i++) {
+ a[i] = b[i] + c[i];
+ }
+}
\ No newline at end of file
diff --git a/tests/loop_fuse/loop_fuse.ll b/tests/loop_fuse/loop_fuse.ll
new file mode 100644
index 00000000000000..50535e86c160b3
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse.ll
@@ -0,0 +1,175 @@
+; ModuleID = 'loop_fuse.c'
+source_filename = "loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %a.addr = alloca i32*, align 8
+ %b.addr = alloca i32*, align 8
+ %c.addr = alloca i32*, align 8
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ %i3 = alloca i32, align 4
+ %i17 = alloca i32, align 4
+ %i31 = alloca i32, align 4
+ store i32* %a, i32** %a.addr, align 8
+ store i32* %b, i32** %b.addr, align 8
+ store i32* %c, i32** %c.addr, align 8
+ store i32 %n, i32* %n.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
+ %add = add nsw i32 %2, %3
+ %4 = load i32*, i32** %c.addr, align 8
+ %5 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %5 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %6 = load i32, i32* %i, align 4
+ %7 = load i32, i32* %i, align 4
+ %mul = mul nsw i32 %6, %7
+ %8 = load i32*, i32** %b.addr, align 8
+ %9 = load i32, i32* %i, align 4
+ %idxprom1 = sext i32 %9 to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %8, i64 %idxprom1
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %10 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 0, i32* %i3, align 4
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %11 = load i32, i32* %i3, align 4
+ %cmp5 = icmp slt i32 %11, 10
+ br i1 %cmp5, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %12 = load i32*, i32** %b.addr, align 8
+ %13 = load i32, i32* %i3, align 4
+ %idxprom7 = sext i32 %13 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %12, i64 %idxprom7
+ %14 = load i32, i32* %arrayidx8, align 4
+ %15 = load i32*, i32** %c.addr, align 8
+ %16 = load i32, i32* %i3, align 4
+ %idxprom9 = sext i32 %16 to i64
+ %arrayidx10 = getelementptr inbounds i32, i32* %15, i64 %idxprom9
+ %17 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %14, %17
+ %18 = load i32*, i32** %a.addr, align 8
+ %19 = load i32, i32* %i3, align 4
+ %idxprom12 = sext i32 %19 to i64
+ %arrayidx13 = getelementptr inbounds i32, i32* %18, i64 %idxprom12
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %20 = load i32, i32* %i3, align 4
+ %inc15 = add nsw i32 %20, 1
+ store i32 %inc15, i32* %i3, align 4
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ store i32 0, i32* %i17, align 4
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc28, %for.end16
+ %21 = load i32, i32* %i17, align 4
+ %cmp19 = icmp slt i32 %21, 10
+ br i1 %cmp19, label %for.body20, label %for.end30
+
+for.body20: ; preds = %for.cond18
+ %22 = load i32*, i32** %b.addr, align 8
+ %23 = load i32, i32* %i17, align 4
+ %idxprom21 = sext i32 %23 to i64
+ %arrayidx22 = getelementptr inbounds i32, i32* %22, i64 %idxprom21
+ %24 = load i32, i32* %arrayidx22, align 4
+ %25 = load i32*, i32** %c.addr, align 8
+ %26 = load i32, i32* %i17, align 4
+ %idxprom23 = sext i32 %26 to i64
+ %arrayidx24 = getelementptr inbounds i32, i32* %25, i64 %idxprom23
+ %27 = load i32, i32* %arrayidx24, align 4
+ %add25 = add nsw i32 %24, %27
+ %28 = load i32*, i32** %a.addr, align 8
+ %29 = load i32, i32* %i17, align 4
+ %idxprom26 = sext i32 %29 to i64
+ %arrayidx27 = getelementptr inbounds i32, i32* %28, i64 %idxprom26
+ store i32 %add25, i32* %arrayidx27, align 4
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body20
+ %30 = load i32, i32* %i17, align 4
+ %inc29 = add nsw i32 %30, 1
+ store i32 %inc29, i32* %i17, align 4
+ br label %for.cond18, !llvm.loop !7
+
+for.end30: ; preds = %for.cond18
+ store i32 0, i32* %i31, align 4
+ br label %for.cond32
+
+for.cond32: ; preds = %for.inc42, %for.end30
+ %31 = load i32, i32* %i31, align 4
+ %32 = load i32, i32* %n.addr, align 4
+ %cmp33 = icmp slt i32 %31, %32
+ br i1 %cmp33, label %for.body34, label %for.end44
+
+for.body34: ; preds = %for.cond32
+ %33 = load i32*, i32** %b.addr, align 8
+ %34 = load i32, i32* %i31, align 4
+ %idxprom35 = sext i32 %34 to i64
+ %arrayidx36 = getelementptr inbounds i32, i32* %33, i64 %idxprom35
+ %35 = load i32, i32* %arrayidx36, align 4
+ %36 = load i32*, i32** %c.addr, align 8
+ %37 = load i32, i32* %i31, align 4
+ %idxprom37 = sext i32 %37 to i64
+ %arrayidx38 = getelementptr inbounds i32, i32* %36, i64 %idxprom37
+ %38 = load i32, i32* %arrayidx38, align 4
+ %add39 = add nsw i32 %35, %38
+ %39 = load i32*, i32** %a.addr, align 8
+ %40 = load i32, i32* %i31, align 4
+ %idxprom40 = sext i32 %40 to i64
+ %arrayidx41 = getelementptr inbounds i32, i32* %39, i64 %idxprom40
+ store i32 %add39, i32* %arrayidx41, align 4
+ br label %for.inc42
+
+for.inc42: ; preds = %for.body34
+ %41 = load i32, i32* %i31, align 4
+ %inc43 = add nsw i32 %41, 1
+ store i32 %inc43, i32* %i31, align 4
+ br label %for.cond32, !llvm.loop !8
+
+for.end44: ; preds = %for.cond32
+ ret void
+}
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
+!7 = distinct !{!7, !5}
+!8 = distinct !{!8, !5}
diff --git a/tests/loop_fuse/loop_fuse_out.ll b/tests/loop_fuse/loop_fuse_out.ll
new file mode 100644
index 00000000000000..82f6059af1e8df
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse_out.ll
@@ -0,0 +1,121 @@
+; ModuleID = 'loop_fuse.ll'
+source_filename = "loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc14 ], [ 0, %for.end ]
+ %exitcond11 = icmp ne i64 %indvars.iv8, 10
+ br i1 %exitcond11, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ br label %for.cond18
+
+for.cond18: ; preds = %for.inc28, %for.end16
+ %indvars.iv12 = phi i64 [ %indvars.iv.next13, %for.inc28 ], [ 0, %for.end16 ]
+ %exitcond15 = icmp ne i64 %indvars.iv12, 10
+ br i1 %exitcond15, label %for.body20, label %for.end30
+
+for.body20: ; preds = %for.cond18
+ %arrayidx22 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv12
+ %i2 = load i32, i32* %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv12
+ %i3 = load i32, i32* %arrayidx24, align 4
+ %add25 = add nsw i32 %i2, %i3
+ %arrayidx27 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv12
+ store i32 %add25, i32* %arrayidx27, align 4
+ br label %for.inc28
+
+for.inc28: ; preds = %for.body20
+ %indvars.iv.next13 = add nuw nsw i64 %indvars.iv12, 1
+ br label %for.cond18, !llvm.loop !7
+
+for.end30: ; preds = %for.cond18
+ %wide.trip.count19 = zext i32 %smax to i64
+ br label %for.cond32
+
+for.cond32: ; preds = %for.inc42, %for.end30
+ %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc42 ], [ 0, %for.end30 ]
+ %exitcond20 = icmp ne i64 %indvars.iv16, %wide.trip.count19
+ br i1 %exitcond20, label %for.body34, label %for.end44
+
+for.body34: ; preds = %for.cond32
+ %arrayidx36 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv16
+ %i4 = load i32, i32* %arrayidx36, align 4
+ %arrayidx38 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv16
+ %i5 = load i32, i32* %arrayidx38, align 4
+ %add39 = add nsw i32 %i4, %i5
+ %arrayidx41 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv16
+ store i32 %add39, i32* %arrayidx41, align 4
+ br label %for.inc42
+
+for.inc42: ; preds = %for.body34
+ %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+ br label %for.cond32, !llvm.loop !8
+
+for.end44: ; preds = %for.cond32
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
+!7 = distinct !{!7, !5}
+!8 = distinct !{!8, !5}
diff --git a/tests/loop_fuse/loop_fuse_out1.ll b/tests/loop_fuse/loop_fuse_out1.ll
new file mode 100644
index 00000000000000..330dd11d463ab2
--- /dev/null
+++ b/tests/loop_fuse/loop_fuse_out1.ll
@@ -0,0 +1,108 @@
+; ModuleID = 'loop_fuse_out.ll'
+source_filename = "loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 0)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc14 ], [ 0, %for.end ]
+ %exitcond11 = icmp ne i64 %indvars.iv8, 10
+ br i1 %exitcond11, label %for.body6, label %for.end30
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.body20
+
+for.inc14: ; preds = %for.body20
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.body20: ; preds = %for.body6
+ %arrayidx22 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv8
+ %i2 = load i32, i32* %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv8
+ %i3 = load i32, i32* %arrayidx24, align 4
+ %add25 = add nsw i32 %i2, %i3
+ %arrayidx27 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv8
+ store i32 %add25, i32* %arrayidx27, align 4
+ br label %for.inc14
+
+for.end30: ; preds = %for.cond4
+ %wide.trip.count19 = zext i32 %smax to i64
+ br label %for.cond32
+
+for.cond32: ; preds = %for.inc42, %for.end30
+ %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc42 ], [ 0, %for.end30 ]
+ %exitcond20 = icmp ne i64 %indvars.iv16, %wide.trip.count19
+ br i1 %exitcond20, label %for.body34, label %for.end44
+
+for.body34: ; preds = %for.cond32
+ %arrayidx36 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv16
+ %i4 = load i32, i32* %arrayidx36, align 4
+ %arrayidx38 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv16
+ %i5 = load i32, i32* %arrayidx38, align 4
+ %add39 = add nsw i32 %i4, %i5
+ %arrayidx41 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv16
+ store i32 %add39, i32* %arrayidx41, align 4
+ br label %for.inc42
+
+for.inc42: ; preds = %for.body34
+ %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+ br label %for.cond32, !llvm.loop !7
+
+for.end44: ; preds = %for.cond32
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
+!7 = distinct !{!7, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse.c b/tests/loop_fuse/negative_loop_fuse.c
new file mode 100644
index 00000000000000..ca7a77bd3b3731
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse.c
@@ -0,0 +1,10 @@
+void init(int *a, int *b, int *c, int n) {
+ for (int i = 3; i < n; i++) {
+ c[i] = i + i;
+ b[i] = i * i;
+ }
+
+ for (int i = 5; i < n; i++) {
+ a[i] = b[i] + c[i];
+ }
+}
\ No newline at end of file
diff --git a/tests/loop_fuse/negative_loop_fuse.ll b/tests/loop_fuse/negative_loop_fuse.ll
new file mode 100644
index 00000000000000..cf38ff8b6ff8e9
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse.ll
@@ -0,0 +1,103 @@
+; ModuleID = 'negative_loop_fuse.c'
+source_filename = "negative_loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %a.addr = alloca i32*, align 8
+ %b.addr = alloca i32*, align 8
+ %c.addr = alloca i32*, align 8
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ %i3 = alloca i32, align 4
+ store i32* %a, i32** %a.addr, align 8
+ store i32* %b, i32** %b.addr, align 8
+ store i32* %c, i32** %c.addr, align 8
+ store i32 %n, i32* %n.addr, align 4
+ store i32 3, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
+ %add = add nsw i32 %2, %3
+ %4 = load i32*, i32** %c.addr, align 8
+ %5 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %5 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %6 = load i32, i32* %i, align 4
+ %7 = load i32, i32* %i, align 4
+ %mul = mul nsw i32 %6, %7
+ %8 = load i32*, i32** %b.addr, align 8
+ %9 = load i32, i32* %i, align 4
+ %idxprom1 = sext i32 %9 to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %8, i64 %idxprom1
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %10 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ store i32 5, i32* %i3, align 4
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %11 = load i32, i32* %i3, align 4
+ %12 = load i32, i32* %n.addr, align 4
+ %cmp5 = icmp slt i32 %11, %12
+ br i1 %cmp5, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %13 = load i32*, i32** %b.addr, align 8
+ %14 = load i32, i32* %i3, align 4
+ %idxprom7 = sext i32 %14 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %13, i64 %idxprom7
+ %15 = load i32, i32* %arrayidx8, align 4
+ %16 = load i32*, i32** %c.addr, align 8
+ %17 = load i32, i32* %i3, align 4
+ %idxprom9 = sext i32 %17 to i64
+ %arrayidx10 = getelementptr inbounds i32, i32* %16, i64 %idxprom9
+ %18 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %15, %18
+ %19 = load i32*, i32** %a.addr, align 8
+ %20 = load i32, i32* %i3, align 4
+ %idxprom12 = sext i32 %20 to i64
+ %arrayidx13 = getelementptr inbounds i32, i32* %19, i64 %idxprom12
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %21 = load i32, i32* %i3, align 4
+ %inc15 = add nsw i32 %21, 1
+ store i32 %inc15, i32* %i3, align 4
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse_out.ll b/tests/loop_fuse/negative_loop_fuse_out.ll
new file mode 100644
index 00000000000000..b3fdb2fa9918a1
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse_out.ll
@@ -0,0 +1,76 @@
+; ModuleID = 'negative_loop_fuse.ll'
+source_filename = "negative_loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 3)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 3, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ %smax7 = call i32 @llvm.smax.i32(i32 %n, i32 5)
+ %wide.trip.count8 = zext i32 %smax7 to i64
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc14 ], [ 5, %for.end ]
+ %exitcond9 = icmp ne i64 %indvars.iv4, %wide.trip.count8
+ br i1 %exitcond9, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv4
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv4
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv4
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
diff --git a/tests/loop_fuse/negative_loop_fuse_out1.ll b/tests/loop_fuse/negative_loop_fuse_out1.ll
new file mode 100644
index 00000000000000..a63151ac6cda1d
--- /dev/null
+++ b/tests/loop_fuse/negative_loop_fuse_out1.ll
@@ -0,0 +1,76 @@
+; ModuleID = 'negative_loop_fuse_out.ll'
+source_filename = "negative_loop_fuse.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init(i32* noundef %a, i32* noundef %b, i32* noundef %c, i32 noundef %n) #0 {
+entry:
+ %smax = call i32 @llvm.smax.i32(i32 %n, i32 3)
+ %wide.trip.count = zext i32 %smax to i64
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 3, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = shl nuw nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %1 = trunc i64 %0 to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %2 = trunc i64 %indvars.iv to i32
+ %mul = mul nsw i32 %2, %2
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !4
+
+for.end: ; preds = %for.cond
+ %smax7 = call i32 @llvm.smax.i32(i32 %n, i32 5)
+ %wide.trip.count8 = zext i32 %smax7 to i64
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc14, %for.end
+ %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc14 ], [ 5, %for.end ]
+ %exitcond9 = icmp ne i64 %indvars.iv4, %wide.trip.count8
+ br i1 %exitcond9, label %for.body6, label %for.end16
+
+for.body6: ; preds = %for.cond4
+ %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv4
+ %i = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv4
+ %i1 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %i, %i1
+ %arrayidx13 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv4
+ store i32 %add11, i32* %arrayidx13, align 4
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body6
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ br label %for.cond4, !llvm.loop !6
+
+for.end16: ; preds = %for.cond4
+ ret void
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.smax.i32(i32, i32) #1
+
+attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 14.0.6 (https://github.com/shravankumar0811/llvm-project.git 47ee914ea16086c1958b93540ed2351bcdae7cdb)"}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.mustprogress"}
+!6 = distinct !{!6, !5}
>From 7000d87c7b48e8d6007ddf31724945a34e34e417 Mon Sep 17 00:00:00 2001
From: Shravan Kumar <shkumar at habana.ai>
Date: Thu, 7 Jul 2022 10:09:06 +0300
Subject: [PATCH 8/8] Adding Loop Fusion pass
---
mlir/examples/toy/Ch2/include/toy/Ops.td | 19 ++++++++++++++++++
mlir/examples/toy/Ch2/mlir/Dialect.cpp | 25 ++++++++++++++++++++++++
mlir/examples/toy/Ch2/mlir/MLIRGen.cpp | 11 +++++++++++
mlir/test/Examples/Toy/Ch2/codegen.toy | 2 +-
tests/CMakeLists.txt | 1 +
tests/toy/codegen.mlir | 16 +++++++++++++++
tests/toy/command.sh | 2 ++
7 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 tests/toy/codegen.mlir
create mode 100644 tests/toy/command.sh
diff --git a/mlir/examples/toy/Ch2/include/toy/Ops.td b/mlir/examples/toy/Ch2/include/toy/Ops.td
index eaec24c3ae5bb7..80f82778580621 100644
--- a/mlir/examples/toy/Ch2/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch2/include/toy/Ops.td
@@ -246,4 +246,23 @@ def TransposeOp : Toy_Op<"transpose"> {
let verifier = [{ return ::verify(*this); }];
}
+def MatmulOp : Toy_Op<"matmul"> {
+ let summary = "matmul operation";
+
+ let arguments = (ins F64Tensor:$a , F64Tensor:$b );
+ let results = (outs F64Tensor);
+
+ let assemblyFormat = [{
+ `(` $a `:` type($a) `,` $b `:` type($b) `)` attr-dict `to` type(results)
+ }];
+
+ // Allow building a MatmulOp with from the input operand.
+ let builders = [
+ OpBuilder<(ins "Value":$a, "Value":$b )>
+ ];
+
+ // Invoke a static verify method to verify this matmul operation.
+ let verifier = [{ return ::verify(*this); }];
+}
+
#endif // TOY_OPS
diff --git a/mlir/examples/toy/Ch2/mlir/Dialect.cpp b/mlir/examples/toy/Ch2/mlir/Dialect.cpp
index 278c857ea46816..3baa930740b39e 100644
--- a/mlir/examples/toy/Ch2/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch2/mlir/Dialect.cpp
@@ -248,6 +248,31 @@ static mlir::LogicalResult verify(TransposeOp op) {
return mlir::success();
}
+//===----------------------------------------------------------------------===//
+// MatmulOp
+
+void MatmulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+ mlir::Value value1, mlir::Value value2) {
+ state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+ state.addOperands(value1);
+ state.addOperands(value2);
+}
+
+static mlir::LogicalResult verify(MatmulOp op) {
+ auto inputType = op.getOperand(0).getType().dyn_cast<RankedTensorType>();
+ auto resultType = op.getType().dyn_cast<RankedTensorType>();
+ if (!inputType || !resultType)
+ return mlir::success();
+
+ auto inputShape = inputType.getShape();
+ if (!std::equal(inputShape.begin(), inputShape.end(),
+ resultType.getShape().rbegin())) {
+ return op.emitError()
+ << "expected result shape to be a matmul of the input";
+ }
+ return mlir::success();
+}
+
//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//
diff --git a/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp
index b7b573672fc74b..f95f7185abab6f 100644
--- a/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp
+++ b/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp
@@ -323,6 +323,17 @@ class MLIRGenImpl {
return builder.create<TransposeOp>(location, operands[0]);
}
+ // Builtin calls have their custom operation, meaning this is a
+ // straightforward emission.
+ if (callee == "matmul") {
+ if (call.getArgs().size() != 2) {
+ emitError(location, "MLIR codegen encountered an error: toy.matmul "
+ "does not accept multiple arguments");
+ return nullptr;
+ }
+ return builder.create<MatmulOp>(location, operands[0], operands[1]);
+ }
+
// Otherwise this is a call to a user-defined function. Calls to
// user-defined functions are mapped to a custom call that takes the callee
// name as an attribute.
diff --git a/mlir/test/Examples/Toy/Ch2/codegen.toy b/mlir/test/Examples/Toy/Ch2/codegen.toy
index ea1708e6fee18d..6ee9b181c5710d 100644
--- a/mlir/test/Examples/Toy/Ch2/codegen.toy
+++ b/mlir/test/Examples/Toy/Ch2/codegen.toy
@@ -2,7 +2,7 @@
# User defined generic function that operates on unknown shaped arguments
def multiply_transpose(a, b) {
- return transpose(a) * transpose(b);
+ return matmul(a,b);
}
def main() {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2321ce7a8e9ce7..dd44aafff57d7e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,3 +2,4 @@ add_subdirectory(cfcss)
add_subdirectory(assignment1)
add_subdirectory(scev)
add_subdirectory(loop_fuse)
+add_subdirectory(toy)
diff --git a/tests/toy/codegen.mlir b/tests/toy/codegen.mlir
new file mode 100644
index 00000000000000..58045981ac1742
--- /dev/null
+++ b/tests/toy/codegen.mlir
@@ -0,0 +1,16 @@
+module {
+ func @multiply_transpose(%arg0: tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":4:1), %arg1: tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":4:1)) -> tensor<*xf64> {
+ %0 = toy.matmul(%arg0 : tensor<*xf64>, %arg1 : tensor<*xf64>) to tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":5:10)
+ toy.return %0 : tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":5:3)
+ } loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":4:1)
+ func @main() {
+ %0 = toy.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":9:17)
+ %1 = toy.reshape(%0 : tensor<2x3xf64>) to tensor<2x3xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":9:3)
+ %2 = toy.constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00]> : tensor<6xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":10:17)
+ %3 = toy.reshape(%2 : tensor<6xf64>) to tensor<2x3xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":10:3)
+ %4 = toy.generic_call @multiply_transpose(%1, %3) : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":11:11)
+ %5 = toy.generic_call @multiply_transpose(%3, %1) : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":12:11)
+ toy.print %5 : tensor<*xf64> loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":13:3)
+ toy.return loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":8:1)
+ } loc("/home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy":8:1)
+} loc(unknown)
diff --git a/tests/toy/command.sh b/tests/toy/command.sh
new file mode 100644
index 00000000000000..109563b08ae60d
--- /dev/null
+++ b/tests/toy/command.sh
@@ -0,0 +1,2 @@
+toyc-ch2 /home/shkumar/LLVM/llvm-project/mlir/test/Examples/Toy/Ch2/codegen.toy -emit=mlir -mlir-print-debuginfo 2> codegen.mlir
+toyc-ch2 codegen.mlir -emit=mlir
More information about the llvm-branch-commits
mailing list