[llvm] b09b9ac - [llvm][CodeGen] Fix the empty interval issue in Window Scheduler (#129204)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 16 23:28:50 PDT 2025
Author: Hua Tian
Date: 2025-03-17T14:28:47+08:00
New Revision: b09b9ac1081d19c8021df8e55e96cd1325f0eed0
URL: https://github.com/llvm/llvm-project/commit/b09b9ac1081d19c8021df8e55e96cd1325f0eed0
DIFF: https://github.com/llvm/llvm-project/commit/b09b9ac1081d19c8021df8e55e96cd1325f0eed0.diff
LOG: [llvm][CodeGen] Fix the empty interval issue in Window Scheduler (#129204)
The interval of newly generated reg in ModuloScheduleExpander is empty.
This will cause crash at some corner case. This patch recalculate the
live intervals of these regs.
Added:
llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
Modified:
llvm/include/llvm/CodeGen/ModuloSchedule.h
llvm/lib/CodeGen/ModuloSchedule.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index 49dc746d3ee35..b6000ba05d882 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -188,6 +188,9 @@ class ModuloScheduleExpander {
/// Instructions to change when emitting the final schedule.
InstrChangesTy InstrChanges;
+ /// Record the registers that need to compute live intervals.
+ SmallVector<Register> NoIntervalRegs;
+
void generatePipelinedLoop();
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -211,6 +214,7 @@ class ModuloScheduleExpander {
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
ValueMapTy *VRMap);
+ void calculateIntervals();
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index bff0b7af989bc..d208a62a99372 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
KernelBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = CI;
}
@@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &MI;
}
@@ -179,6 +181,10 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Add branches between prolog and epilog blocks.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
+ // The intervals of newly created virtual registers are calculated after the
+ // kernel expansion.
+ calculateIntervals();
+
delete[] VRMap;
delete[] VRMapPhi;
}
@@ -226,6 +232,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
NewBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &*BBI;
}
}
@@ -303,6 +310,7 @@ void ModuloScheduleExpander::generateEpilog(
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
NewBB->push_back(NewMI);
+ LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = In;
}
}
@@ -343,14 +351,11 @@ void ModuloScheduleExpander::generateEpilog(
/// basic block with ToReg.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
MachineBasicBlock *MBB,
- MachineRegisterInfo &MRI,
- LiveIntervals &LIS) {
+ MachineRegisterInfo &MRI) {
for (MachineOperand &O :
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
- if (!LIS.hasInterval(ToReg))
- LIS.createEmptyInterval(ToReg);
}
/// Return true if the register has a use that occurs outside the
@@ -544,8 +549,10 @@ void ModuloScheduleExpander::generateExistingPhis(
if (VRMap[LastStageNum - np - 1].count(LoopVal))
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ if (IsLast && np == NumPhis - 1) {
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
+ NoIntervalRegs.push_back(NewReg);
+ }
continue;
}
}
@@ -563,6 +570,7 @@ void ModuloScheduleExpander::generateExistingPhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
+ LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;
@@ -584,8 +592,10 @@ void ModuloScheduleExpander::generateExistingPhis(
// Check if we need to rename any uses that occurs after the loop. The
// register to replace depends on whether the Phi is scheduled in the
// epilog.
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ if (IsLast && np == NumPhis - 1) {
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
+ NoIntervalRegs.push_back(NewReg);
+ }
// In the kernel, a dependent Phi uses the value from this Phi.
if (InKernel)
@@ -603,9 +613,12 @@ void ModuloScheduleExpander::generateExistingPhis(
// Check if we need to rename a Phi that has been eliminated due to
// scheduling.
if (NumStages == 0 && IsLast) {
- auto It = VRMap[CurStageNum].find(LoopVal);
- if (It != VRMap[CurStageNum].end())
- replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
+ auto &CurStageMap = VRMap[CurStageNum];
+ auto It = CurStageMap.find(LoopVal);
+ if (It != CurStageMap.end()) {
+ replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
+ NoIntervalRegs.push_back(It->second);
+ }
}
}
}
@@ -705,6 +718,7 @@ void ModuloScheduleExpander::generatePhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
+ LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;
@@ -724,8 +738,10 @@ void ModuloScheduleExpander::generatePhis(
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
NewReg);
}
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ if (IsLast && np == NumPhis - 1) {
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
+ NoIntervalRegs.push_back(NewReg);
+ }
}
}
}
@@ -834,9 +850,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
// We split the lifetime when we find the first use.
if (!SplitReg) {
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
- BuildMI(*KernelBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), SplitReg)
- .addReg(Def);
+ MachineInstr *newCopy =
+ BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(Def);
+ LIS.InsertMachineInstrInMaps(*newCopy);
}
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
}
@@ -904,6 +922,8 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
removePhis(Epilog, LastEpi);
// Remove the blocks that are no longer referenced.
if (LastPro != LastEpi) {
+ for (auto &MI : *LastEpi)
+ LIS.RemoveMachineInstrFromMaps(MI);
LastEpi->clear();
LastEpi->eraseFromParent();
}
@@ -911,6 +931,8 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
LoopInfo->disposed(&LIS);
NewKernel = nullptr;
}
+ for (auto &MI : *LastPro)
+ LIS.RemoveMachineInstrFromMaps(MI);
LastPro->clear();
LastPro->eraseFromParent();
} else {
@@ -931,6 +953,14 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
}
}
+/// Some registers are generated during the kernel expansion. We calculate the
+/// live intervals of these registers after the expansion.
+void ModuloScheduleExpander::calculateIntervals() {
+ for (Register Reg : NoIntervalRegs)
+ LIS.createAndComputeVirtRegInterval(Reg);
+ NoIntervalRegs.clear();
+}
+
/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1051,8 +1081,10 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
Register NewReg = MRI.createVirtualRegister(RC);
MO.setReg(NewReg);
VRMap[CurStageNum][reg] = NewReg;
- if (LastDef)
- replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+ if (LastDef) {
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
+ NoIntervalRegs.push_back(NewReg);
+ }
} else if (MO.isUse()) {
MachineInstr *Def = MRI.getVRegDef(reg);
// Compute the stage that contains the last definition for instruction.
@@ -1201,10 +1233,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
UseOp.setReg(ReplaceReg);
else {
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
- BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
- SplitReg)
- .addReg(ReplaceReg);
+ MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(ReplaceReg);
UseOp.setReg(SplitReg);
+ LIS.InsertMachineInstrInMaps(*newCopy);
}
}
}
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
new file mode 100644
index 0000000000000..ef52ff11af9c8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
@@ -0,0 +1,157 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s
+
+--- |
+ define void @test_swp_ws_live_intervals(i32 %.pre) {
+ entry:
+ %cgep9 = bitcast ptr null to ptr
+ br label %for.body147
+
+ for.body147: ; preds = %for.body170, %entry
+ %add11.i526 = or i32 %.pre, 1
+ br label %for.body158
+
+ for.body158: ; preds = %for.body158, %for.body147
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
+ %add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
+ %and8.i534 = and i32 %add11.i536602603, 1
+ %cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
+ store i32 0, ptr %cgep7, align 4
+ %lsr.iv.next = add nsw i32 %lsr.iv, 1
+ %cmp157.3 = icmp ult i32 %lsr.iv.next, 510
+ br i1 %cmp157.3, label %for.body158, label %for.body170
+
+ for.body170: ; preds = %for.body170, %for.body158
+ %lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
+ %lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
+ %add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
+ %cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
+ store i32 0, ptr %cgep5, align 8
+ %sub.i547.1 = add i32 %add11.i556606607, 1
+ %and.i548.1 = and i32 %sub.i547.1, 1
+ %cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
+ %0 = load i32, ptr %cgep8, align 4
+ store i32 %0, ptr %lsr.iv3, align 4
+ %lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
+ %cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
+ %cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
+ br i1 %cmp169.1, label %for.body170, label %for.body147
+ }
+
+...
+---
+name: test_swp_ws_live_intervals
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_swp_ws_live_intervals
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
+ ; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
+ ; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
+ ; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
+ ; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
+ ; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
+ ; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ ; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
+ ; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
+ ; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
+ ; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
+ ; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
+ ; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
+ ; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
+ ; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ ; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
+ ; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
+ ; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ ; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
+ ; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
+ ; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
+ ; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
+ ; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
+ ; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
+ ; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
+ ; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
+ ; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ ; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $r0
+
+ %0:intregs = COPY $r0
+ %1:intregs = S2_setbit_i %0, 0
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+ bb.2:
+ successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+
+ %2:intregs = PHI %1, %bb.1, %3, %bb.2
+ %4:intregs = A2_andir %2, 1
+ %5:intregs = S2_asl_i_r %4, 2
+ S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
+ %6:intregs = A2_tfrsi 1
+ %7:intregs = A2_tfrsi 4
+ %3:intregs = A2_tfrsi 0
+ ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.3, implicit-def dead $pc
+
+ bb.3:
+ successors: %bb.4(0x80000000)
+
+ J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ J2_jump %bb.4, implicit-def $pc
+
+ bb.4:
+ successors: %bb.4(0x7c000000), %bb.1(0x04000000)
+
+ %8:intregs = PHI %7, %bb.3, %9, %bb.4
+ %10:intregs = PHI %6, %bb.3, %11, %bb.4
+ %11:intregs = A2_tfrsi 0
+ S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
+ %12:intregs = A2_addi %10, 1
+ %13:intregs = S2_insert %5, %12, 1, 2
+ %14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
+ S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
+ %9:intregs = A2_addi %8, 2
+ ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.1, implicit-def dead $pc
+
+...
More information about the llvm-commits
mailing list