[llvm] [llvm][CodeGen] Fix the empty interval issue in Window Scheduler(#128714) (PR #129204)
Hua Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 02:27:51 PDT 2025
https://github.com/huaatian updated https://github.com/llvm/llvm-project/pull/129204
>From 40c675fe63a45be5462c14ad8a2c984d298dcb9c Mon Sep 17 00:00:00 2001
From: akiratian <akiratian at tencent.com>
Date: Fri, 28 Feb 2025 10:13:55 +0800
Subject: [PATCH 1/3] [llvm][CodeGen] Fix the empty interval issue in Window
Scheduler(#128714)
The interval of newly generated reg in ModuloScheduleExpander is empty.
This will cause crush at some corner case. This patch recalculate the
live intervals of these regs.
---
llvm/include/llvm/CodeGen/ModuloSchedule.h | 4 +
llvm/lib/CodeGen/ModuloSchedule.cpp | 65 ++++++--
.../swp-ws-live-intervals-issue128714.mir | 157 ++++++++++++++++++
3 files changed, 213 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index 49dc746d3ee35..4005bf1a2d5fa 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -188,6 +188,9 @@ class ModuloScheduleExpander {
/// Instructions to change when emitting the final schedule.
InstrChangesTy InstrChanges;
+ /// Record newly created registers with empty live intervals.
+ SmallVector<Register> EmptyIntervalRegs;
+
void generatePipelinedLoop();
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -211,6 +214,7 @@ class ModuloScheduleExpander {
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
ValueMapTy *VRMap);
+ void recalcEmptyIntervals();
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index c6d1a416407f2..cb71e9680766a 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
KernelBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = CI;
}
@@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &MI;
}
@@ -179,6 +181,8 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Add branches between prolog and epilog blocks.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
+ recalcEmptyIntervals();
+
delete[] VRMap;
delete[] VRMapPhi;
}
@@ -226,6 +230,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
NewBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &*BBI;
}
}
@@ -303,6 +308,7 @@ void ModuloScheduleExpander::generateEpilog(
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
NewBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = In;
}
}
@@ -344,13 +350,16 @@ void ModuloScheduleExpander::generateEpilog(
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
- LiveIntervals &LIS) {
+ LiveIntervals &LIS,
+ SmallVector<Register> &EmptyIntervalRegs) {
for (MachineOperand &O :
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
- if (!LIS.hasInterval(ToReg))
+ if (!LIS.hasInterval(ToReg)) {
LIS.createEmptyInterval(ToReg);
+ EmptyIntervalRegs.push_back(ToReg);
+ }
}
/// Return true if the register has a use that occurs outside the
@@ -542,7 +551,8 @@ void ModuloScheduleExpander::generateExistingPhis(
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS,
+ EmptyIntervalRegs);
continue;
}
}
@@ -560,6 +570,7 @@ void ModuloScheduleExpander::generateExistingPhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
+ LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;
@@ -582,7 +593,7 @@ void ModuloScheduleExpander::generateExistingPhis(
// register to replace depends on whether the Phi is scheduled in the
// epilog.
if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
// In the kernel, a dependent Phi uses the value from this Phi.
if (InKernel)
@@ -602,7 +613,8 @@ void ModuloScheduleExpander::generateExistingPhis(
if (NumStages == 0 && IsLast) {
auto It = VRMap[CurStageNum].find(LoopVal);
if (It != VRMap[CurStageNum].end())
- replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
+ replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS,
+ EmptyIntervalRegs);
}
}
}
@@ -702,6 +714,7 @@ void ModuloScheduleExpander::generatePhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
+ LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;
@@ -722,7 +735,7 @@ void ModuloScheduleExpander::generatePhis(
NewReg);
}
if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
}
}
}
@@ -831,9 +844,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
// We split the lifetime when we find the first use.
if (!SplitReg) {
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
- BuildMI(*KernelBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), SplitReg)
- .addReg(Def);
+ MachineInstr *newCopy =
+ BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(Def);
+ LIS.InsertMachineInstrInMaps(*newCopy);
}
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
}
@@ -901,6 +916,8 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
removePhis(Epilog, LastEpi);
// Remove the blocks that are no longer referenced.
if (LastPro != LastEpi) {
+ for (auto &MI : *LastEpi)
+ LIS.RemoveMachineInstrFromMaps(MI);
LastEpi->clear();
LastEpi->eraseFromParent();
}
@@ -908,6 +925,8 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
LoopInfo->disposed(&LIS);
NewKernel = nullptr;
}
+ for (auto &MI : *LastPro)
+ LIS.RemoveMachineInstrFromMaps(MI);
LastPro->clear();
LastPro->eraseFromParent();
} else {
@@ -928,6 +947,25 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
}
}
+/// Some new registers are generated during the kernel expansion. We recalculate
+/// the live intervals of these registers after the expansion.
+void ModuloScheduleExpander::recalcEmptyIntervals() {
+ // The interval can be computed if the register's non-debug users have
+ // slot indexes.
+ auto CanRecalculateInterval = [this](unsigned Reg) -> bool {
+ for (auto &Opnd : this->MRI.reg_nodbg_operands(Reg))
+ if (this->LIS.isNotInMIMap(*Opnd.getParent()))
+ return false;
+ return true;
+ };
+ for (auto Reg : EmptyIntervalRegs)
+ if (CanRecalculateInterval(Reg)) {
+ LIS.removeInterval(Reg);
+ LIS.createAndComputeVirtRegInterval(Reg);
+ }
+ EmptyIntervalRegs.clear();
+}
+
/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1049,7 +1087,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
MO.setReg(NewReg);
VRMap[CurStageNum][reg] = NewReg;
if (LastDef)
- replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
} else if (MO.isUse()) {
MachineInstr *Def = MRI.getVRegDef(reg);
// Compute the stage that contains the last definition for instruction.
@@ -1198,10 +1236,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
UseOp.setReg(ReplaceReg);
else {
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
- BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
- SplitReg)
- .addReg(ReplaceReg);
+ MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(ReplaceReg);
UseOp.setReg(SplitReg);
+ LIS.InsertMachineInstrInMaps(*newCopy);
}
}
}
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
new file mode 100644
index 0000000000000..ef52ff11af9c8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
@@ -0,0 +1,157 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s
+
+--- |
+ define void @test_swp_ws_live_intervals(i32 %.pre) {
+ entry:
+ %cgep9 = bitcast ptr null to ptr
+ br label %for.body147
+
+ for.body147: ; preds = %for.body170, %entry
+ %add11.i526 = or i32 %.pre, 1
+ br label %for.body158
+
+ for.body158: ; preds = %for.body158, %for.body147
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
+ %add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
+ %and8.i534 = and i32 %add11.i536602603, 1
+ %cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
+ store i32 0, ptr %cgep7, align 4
+ %lsr.iv.next = add nsw i32 %lsr.iv, 1
+ %cmp157.3 = icmp ult i32 %lsr.iv.next, 510
+ br i1 %cmp157.3, label %for.body158, label %for.body170
+
+ for.body170: ; preds = %for.body170, %for.body158
+ %lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
+ %lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
+ %add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
+ %cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
+ store i32 0, ptr %cgep5, align 8
+ %sub.i547.1 = add i32 %add11.i556606607, 1
+ %and.i548.1 = and i32 %sub.i547.1, 1
+ %cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
+ %0 = load i32, ptr %cgep8, align 4
+ store i32 %0, ptr %lsr.iv3, align 4
+ %lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
+ %cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
+ %cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
+ br i1 %cmp169.1, label %for.body170, label %for.body147
+ }
+
+...
+---
+name: test_swp_ws_live_intervals
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_swp_ws_live_intervals
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
+ ; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
+ ; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
+ ; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
+ ; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
+ ; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
+ ; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ ; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
+ ; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
+ ; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
+ ; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
+ ; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
+ ; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
+ ; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
+ ; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ ; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
+ ; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
+ ; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ ; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
+ ; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
+ ; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
+ ; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
+ ; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
+ ; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
+ ; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
+ ; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
+ ; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ ; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $r0
+
+ %0:intregs = COPY $r0
+ %1:intregs = S2_setbit_i %0, 0
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+ bb.2:
+ successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+
+ %2:intregs = PHI %1, %bb.1, %3, %bb.2
+ %4:intregs = A2_andir %2, 1
+ %5:intregs = S2_asl_i_r %4, 2
+ S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
+ %6:intregs = A2_tfrsi 1
+ %7:intregs = A2_tfrsi 4
+ %3:intregs = A2_tfrsi 0
+ ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.3, implicit-def dead $pc
+
+ bb.3:
+ successors: %bb.4(0x80000000)
+
+ J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ J2_jump %bb.4, implicit-def $pc
+
+ bb.4:
+ successors: %bb.4(0x7c000000), %bb.1(0x04000000)
+
+ %8:intregs = PHI %7, %bb.3, %9, %bb.4
+ %10:intregs = PHI %6, %bb.3, %11, %bb.4
+ %11:intregs = A2_tfrsi 0
+ S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
+ %12:intregs = A2_addi %10, 1
+ %13:intregs = S2_insert %5, %12, 1, 2
+ %14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
+ S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
+ %9:intregs = A2_addi %8, 2
+ ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.1, implicit-def dead $pc
+
+...
>From a5a28f941cc5de36df03006e0a195485d25716c5 Mon Sep 17 00:00:00 2001
From: akiratian <akiratian at tencent.com>
Date: Fri, 28 Feb 2025 17:47:15 +0800
Subject: [PATCH 2/3] [llvm][CodeGen] Modifications made based on review
comments 1
---
llvm/include/llvm/CodeGen/ModuloSchedule.h | 6 +--
llvm/lib/CodeGen/ModuloSchedule.cpp | 46 +++++++++++-----------
2 files changed, 27 insertions(+), 25 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index 4005bf1a2d5fa..b6000ba05d882 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -188,8 +188,8 @@ class ModuloScheduleExpander {
/// Instructions to change when emitting the final schedule.
InstrChangesTy InstrChanges;
- /// Record newly created registers with empty live intervals.
- SmallVector<Register> EmptyIntervalRegs;
+ /// Record the registers that need to compute live intervals.
+ SmallVector<Register> NoIntervalRegs;
void generatePipelinedLoop();
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
@@ -214,7 +214,7 @@ class ModuloScheduleExpander {
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
ValueMapTy *VRMap);
- void recalcEmptyIntervals();
+ void calculateIntervals();
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index cb71e9680766a..24fa9747a1e77 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -181,7 +181,9 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Add branches between prolog and epilog blocks.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
- recalcEmptyIntervals();
+ // The intervals of newly created virtual registers are calculated after the
+ // kernel expansion.
+ calculateIntervals();
delete[] VRMap;
delete[] VRMapPhi;
@@ -351,15 +353,15 @@ static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS,
- SmallVector<Register> &EmptyIntervalRegs) {
+ SmallVector<Register> &NoIntervalRegs) {
for (MachineOperand &O :
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
- if (!LIS.hasInterval(ToReg)) {
- LIS.createEmptyInterval(ToReg);
- EmptyIntervalRegs.push_back(ToReg);
- }
+ // The interval will be calculated after the kernel expansion in
+ // calculateIntervals().
+ if (!LIS.hasInterval(ToReg))
+ NoIntervalRegs.push_back(ToReg);
}
/// Return true if the register has a use that occurs outside the
@@ -552,7 +554,7 @@ void ModuloScheduleExpander::generateExistingPhis(
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS,
- EmptyIntervalRegs);
+ NoIntervalRegs);
continue;
}
}
@@ -593,7 +595,7 @@ void ModuloScheduleExpander::generateExistingPhis(
// register to replace depends on whether the Phi is scheduled in the
// epilog.
if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, NoIntervalRegs);
// In the kernel, a dependent Phi uses the value from this Phi.
if (InKernel)
@@ -613,8 +615,7 @@ void ModuloScheduleExpander::generateExistingPhis(
if (NumStages == 0 && IsLast) {
auto It = VRMap[CurStageNum].find(LoopVal);
if (It != VRMap[CurStageNum].end())
- replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS,
- EmptyIntervalRegs);
+ replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS, NoIntervalRegs);
}
}
}
@@ -735,7 +736,7 @@ void ModuloScheduleExpander::generatePhis(
NewReg);
}
if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, NoIntervalRegs);
}
}
}
@@ -947,23 +948,24 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
}
}
-/// Some new registers are generated during the kernel expansion. We recalculate
-/// the live intervals of these registers after the expansion.
-void ModuloScheduleExpander::recalcEmptyIntervals() {
- // The interval can be computed if the register's non-debug users have
+/// Some registers are generated during the kernel expansion. We calculate the
+/// live intervals of these registers after the expansion.
+void ModuloScheduleExpander::calculateIntervals() {
+ // The interval can be computed if all the register's non-debug users have
// slot indexes.
- auto CanRecalculateInterval = [this](unsigned Reg) -> bool {
+ auto CanCalculateInterval = [this](Register Reg) -> bool {
for (auto &Opnd : this->MRI.reg_nodbg_operands(Reg))
if (this->LIS.isNotInMIMap(*Opnd.getParent()))
return false;
return true;
};
- for (auto Reg : EmptyIntervalRegs)
- if (CanRecalculateInterval(Reg)) {
- LIS.removeInterval(Reg);
+ for (auto Reg : NoIntervalRegs) {
+ if (CanCalculateInterval(Reg))
LIS.createAndComputeVirtRegInterval(Reg);
- }
- EmptyIntervalRegs.clear();
+ else
+ LIS.createEmptyInterval(Reg);
+ }
+ NoIntervalRegs.clear();
}
/// Return true if we can compute the amount the instruction changes
@@ -1087,7 +1089,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
MO.setReg(NewReg);
VRMap[CurStageNum][reg] = NewReg;
if (LastDef)
- replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS, NoIntervalRegs);
} else if (MO.isUse()) {
MachineInstr *Def = MRI.getVRegDef(reg);
// Compute the stage that contains the last definition for instruction.
>From 10934db6a5d09332197a64c5f0f11abf3ab2985c Mon Sep 17 00:00:00 2001
From: akiratian <akiratian at tencent.com>
Date: Mon, 10 Mar 2025 17:27:09 +0800
Subject: [PATCH 3/3] [llvm][CodeGen] Modifications made based on review
comments 2
---
llvm/lib/CodeGen/ModuloSchedule.cpp | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 24fa9747a1e77..08d4593b075b7 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -951,20 +951,8 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
/// Some registers are generated during the kernel expansion. We calculate the
/// live intervals of these registers after the expansion.
void ModuloScheduleExpander::calculateIntervals() {
- // The interval can be computed if all the register's non-debug users have
- // slot indexes.
- auto CanCalculateInterval = [this](Register Reg) -> bool {
- for (auto &Opnd : this->MRI.reg_nodbg_operands(Reg))
- if (this->LIS.isNotInMIMap(*Opnd.getParent()))
- return false;
- return true;
- };
- for (auto Reg : NoIntervalRegs) {
- if (CanCalculateInterval(Reg))
- LIS.createAndComputeVirtRegInterval(Reg);
- else
- LIS.createEmptyInterval(Reg);
- }
+ for (auto Reg : NoIntervalRegs)
+ LIS.createAndComputeVirtRegInterval(Reg);
NoIntervalRegs.clear();
}
More information about the llvm-commits
mailing list