[llvm] 0e4378c - [LoongArch] Add emergency spill slot for CFR spill/reload
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 22:53:35 PST 2022
Author: wanglei
Date: 2022-11-19T14:35:31+08:00
New Revision: 0e4378c55e318869ea6404f2351f143e6d17fbfe
URL: https://github.com/llvm/llvm-project/commit/0e4378c55e318869ea6404f2351f143e6d17fbfe
DIFF: https://github.com/llvm/llvm-project/commit/0e4378c55e318869ea6404f2351f143e6d17fbfe.diff
LOG: [LoongArch] Add emergency spill slot for CFR spill/reload
When all registers have been allocated and CFR needs to be saved on the
stack, an emergency spill slot is required. Because CFR's spill and
reload require a general purpose register to transfer.
The attached test case was bugpoint-reduced down from
`MultiSource/Benchmarks/mafft/Lalignmm.c` in the test-suite.
Without this patch, llc will crash and report the following errors:
```
LLVM ERROR: Error while trying to spill R4 from class GPR: Cannot scavenge register without an emergency spill slot!
```
Reviewed By: SixWeining
Differential Revision: https://reviews.llvm.org/D138007
Added:
llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index a5c84380e77f4..61c2b76879e39 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -14,6 +14,7 @@
#include "LoongArchMachineFunctionInfo.h"
#include "LoongArchSubtarget.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -127,6 +128,16 @@ static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII,
return FuncSize;
}
+static bool needScavSlotForCFR(MachineFunction &MF) {
+ if (!MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
+ return false;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ if (MI.getOpcode() == LoongArch::PseudoST_CFR)
+ return true;
+ return false;
+}
+
void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
@@ -136,26 +147,33 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
MF.getInfo<LoongArchMachineFunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned ScavSlotsNum = 0;
+
// Far branches beyond 27-bit offset require a spill slot for scratch register.
- if (!isInt<27>(estimateFunctionSizeInBytes(TII, MF))) {
+ bool IsLargeFunction = !isInt<27>(estimateFunctionSizeInBytes(TII, MF));
+ if (IsLargeFunction)
+ ScavSlotsNum = 1;
+
+ // estimateStackSize has been observed to under-estimate the final stack
+ // size, so give ourselves wiggle-room by checking for stack size
+ // representable an 11-bit signed field rather than 12-bits.
+ if (!isInt<11>(MFI.estimateStackSize(MF)))
+ ScavSlotsNum = std::max(ScavSlotsNum, 1u);
+
+ // For CFR spill.
+ if (needScavSlotForCFR(MF))
+ ++ScavSlotsNum;
+
+ // Create emergency spill slots.
+ for (unsigned i = 0; i < ScavSlotsNum; ++i) {
int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC),
false);
RS->addScavengingFrameIndex(FI);
- if (LAFI->getBranchRelaxationSpillFrameIndex() == -1)
+ if (IsLargeFunction && LAFI->getBranchRelaxationSpillFrameIndex() == -1)
LAFI->setBranchRelaxationSpillFrameIndex(FI);
+ LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
+ << ") as the emergency spill slot.\n");
}
- // estimateStackSize has been observed to under-estimate the final stack
- // size, so give ourselves wiggle-room by checking for stack size
- // representable an 11-bit signed field rather than 12-bits.
- if (isInt<11>(MFI.estimateStackSize(MF)))
- return;
-
- // Create an emergency spill slot.
- int FI =
- MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), false);
- RS->addScavengingFrameIndex(FI);
- LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
- << ") as the emergency spill slot.\n");
}
void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
diff --git a/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll b/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll
new file mode 100644
index 0000000000000..4a9189c97e417
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll
@@ -0,0 +1,127 @@
+; RUN: llc --mtriple=loongarch64 -mattr=+d %s -o /dev/null
+
+; ModuleID = 'bugpoint-reduced-simplifycfg.bc'
+source_filename = "test-suite-src/MultiSource/Benchmarks/mafft/Lalignmm.c"
+
+define float @Lalignmm_hmout(ptr %seq1, ptr %eff1, i32 %icyc) {
+entry:
+ %call4 = tail call i64 @strlen(ptr dereferenceable(1) poison)
+ %conv5 = trunc i64 %call4 to i32
+ %call7 = tail call i64 @strlen(ptr dereferenceable(1) poison)
+ %call20 = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call22 = tail call ptr @AllocateFloatVec(i32 signext poison)
+ tail call void @st_OpeningGapCount(ptr poison, i32 signext %icyc, ptr %seq1, ptr %eff1, i32 signext %conv5)
+ %sub110 = add nsw i32 %conv5, -1
+ %sub111 = add nsw i32 0, -1
+ br i1 poison, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i: ; preds = %entry
+ %sext294 = shl i64 %call4, 32
+ %conv23.i = ashr exact i64 %sext294, 32
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i, %for.cond.preheader.i
+ %call.i = tail call ptr @strncpy(ptr poison, ptr poison, i64 %conv23.i)
+ br label %for.body.i
+
+if.end.i: ; preds = %entry
+ %call82.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call84.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call86.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call88.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call90.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call92.i = tail call ptr @AllocateIntVec(i32 signext poison)
+ %call94.i = tail call ptr @AllocateIntVec(i32 signext poison)
+ %call104.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call108.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+ %call110.i = tail call ptr @AllocateIntVec(i32 signext poison)
+ %idxprom220.i = sext i32 %sub111 to i64
+ %mpjpt.018.i = getelementptr inbounds i32, ptr %call110.i, i64 1
+ %arrayidx329.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom220.i
+ %idxprom332.i = and i64 %call7, 4294967295
+ %wide.trip.count130.i = zext i32 poison to i64
+ %0 = add nsw i64 1, -1
+ %arrayidx239.i = getelementptr inbounds float, ptr %call104.i, i64 1
+ %1 = load float, ptr %arrayidx239.i, align 4
+ store float %1, ptr %call84.i, align 4
+ %curpt.017.i = getelementptr inbounds float, ptr %call84.i, i64 1
+ %arrayidx279.i = getelementptr inbounds float, ptr %call20, i64 %0
+ %2 = load ptr, ptr poison, align 8
+ %3 = load ptr, ptr null, align 8
+ %4 = trunc i64 %0 to i32
+ br label %for.body260.us.i
+
+for.body260.us.i: ; preds = %if.end292.us.i, %if.end.i
+ %indvars.iv132.i = phi i64 [ %indvars.iv.next133.i, %if.end292.us.i ], [ 1, %if.end.i ]
+ %mpjpt.026.us.i = phi ptr [ poison, %if.end292.us.i ], [ %mpjpt.018.i, %if.end.i ]
+ %curpt.025.us.i = phi ptr [ %curpt.0.us.i, %if.end292.us.i ], [ %curpt.017.i, %if.end.i ]
+ %prept.022.us.i = phi ptr [ %incdec.ptr316.us.i, %if.end292.us.i ], [ %call82.i, %if.end.i ]
+ %mi.021.us.i = phi float [ %mi.1.us.i, %if.end292.us.i ], [ poison, %if.end.i ]
+ %5 = load float, ptr %prept.022.us.i, align 4
+ %6 = add nsw i64 %indvars.iv132.i, -1
+ %arrayidx263.us.i = getelementptr inbounds float, ptr %call22, i64 %6
+ %7 = load float, ptr %arrayidx263.us.i, align 4
+ %add264.us.i = fadd float %mi.021.us.i, %7
+ %cmp265.us.i = fcmp ogt float %add264.us.i, %5
+ %wm.0.us.i = select i1 %cmp265.us.i, float %add264.us.i, float %5
+ %arrayidx270.us.i = getelementptr inbounds float, ptr poison, i64 %indvars.iv132.i
+ %cmp272.us.i = fcmp ult float 0.000000e+00, %mi.021.us.i
+ %mi.1.us.i = select i1 %cmp272.us.i, float %mi.021.us.i, float 0.000000e+00
+ %8 = trunc i64 %6 to i32
+ %mpi.1.us.i = select i1 %cmp272.us.i, i32 0, i32 %8
+ %9 = load float, ptr %arrayidx279.i, align 4
+ %add280.us.i = fadd float 0.000000e+00, %9
+ %cmp281.us.i = fcmp ogt float %add280.us.i, %wm.0.us.i
+ %wm.1.us.i = select i1 %cmp281.us.i, float %add280.us.i, float %wm.0.us.i
+ %cmp288.us.i = fcmp ult float poison, 0.000000e+00
+ br i1 %cmp288.us.i, label %if.end292.us.i, label %if.then290.us.i
+
+if.then290.us.i: ; preds = %for.body260.us.i
+ store i32 %4, ptr %mpjpt.026.us.i, align 4
+ br label %if.end292.us.i
+
+if.end292.us.i: ; preds = %if.then290.us.i, %for.body260.us.i
+ %10 = phi i32 [ %4, %if.then290.us.i ], [ poison, %for.body260.us.i ]
+ %add293.us.i = fadd float %wm.1.us.i, 0.000000e+00
+ %arrayidx297.us.i = getelementptr inbounds float, ptr %2, i64 %indvars.iv132.i
+ store float %add293.us.i, ptr %arrayidx297.us.i, align 4
+ %arrayidx306.us.i = getelementptr inbounds i32, ptr %call94.i, i64 %indvars.iv132.i
+ store i32 %10, ptr %arrayidx306.us.i, align 4
+ %arrayidx308.us.i = getelementptr inbounds i32, ptr %call92.i, i64 %indvars.iv132.i
+ store i32 %mpi.1.us.i, ptr %arrayidx308.us.i, align 4
+ %11 = load float, ptr %curpt.025.us.i, align 4
+ %arrayidx310.us.i = getelementptr inbounds float, ptr %call86.i, i64 %indvars.iv132.i
+ store float %11, ptr %arrayidx310.us.i, align 4
+ %arrayidx312.us.i = getelementptr inbounds float, ptr %call90.i, i64 %indvars.iv132.i
+ store float 0.000000e+00, ptr %arrayidx312.us.i, align 4
+ %arrayidx314.us.i = getelementptr inbounds float, ptr %call88.i, i64 %indvars.iv132.i
+ store float %mi.1.us.i, ptr %arrayidx314.us.i, align 4
+ %incdec.ptr316.us.i = getelementptr inbounds float, ptr %prept.022.us.i, i64 1
+ %indvars.iv.next133.i = add nuw nsw i64 %indvars.iv132.i, 1
+ %curpt.0.us.i = getelementptr inbounds float, ptr %curpt.025.us.i, i64 1
+ %exitcond137.not.i = icmp eq i64 %indvars.iv.next133.i, %wide.trip.count130.i
+ br i1 %exitcond137.not.i, label %for.end321.i, label %for.body260.us.i
+
+for.end321.i: ; preds = %if.end292.us.i
+ %12 = load float, ptr %arrayidx329.i, align 4
+ %arrayidx333.i = getelementptr inbounds float, ptr %3, i64 %idxprom332.i
+ store float %12, ptr %arrayidx333.i, align 4
+ tail call fastcc void @match_calc(ptr %call104.i, ptr poison, ptr poison, i32 signext %sub111, i32 signext %conv5, ptr poison, ptr poison, i32 signext 1)
+ br label %for.body429.i
+
+for.body429.i: ; preds = %for.body429.i, %for.end321.i
+ %j.743.i = phi i32 [ %sub111, %for.end321.i ], [ %sub436.i, %for.body429.i ]
+ %sub436.i = add nsw i32 %j.743.i, -1
+ %idxprom437.i = zext i32 %sub436.i to i64
+ %arrayidx438.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom437.i
+ store float 0.000000e+00, ptr %arrayidx438.i, align 4
+ store i32 %sub110, ptr poison, align 4
+ br label %for.body429.i
+}
+
+declare i64 @strlen(ptr)
+declare ptr @AllocateFloatVec(i32)
+declare void @st_OpeningGapCount(ptr, i32, ptr, ptr, i32)
+declare ptr @strncpy(ptr, ptr, i64)
+declare ptr @AllocateIntVec(i32)
+declare void @match_calc(ptr, ptr, ptr, i32, i32, ptr, ptr, i32)
More information about the llvm-commits
mailing list