[llvm] 0e4378c - [LoongArch] Add emergency spill slot for CFR spill/reload

Fri Nov 18 22:53:35 PST 2022

Author: wanglei
Date: 2022-11-19T14:35:31+08:00
New Revision: 0e4378c55e318869ea6404f2351f143e6d17fbfe

URL: https://github.com/llvm/llvm-project/commit/0e4378c55e318869ea6404f2351f143e6d17fbfe
DIFF: https://github.com/llvm/llvm-project/commit/0e4378c55e318869ea6404f2351f143e6d17fbfe.diff

LOG: [LoongArch] Add emergency spill slot for CFR spill/reload

When all registers have been allocated and CFR needs to be saved on the
stack, an emergency spill slot is required. Because CFR's spill and
reload require a general purpose register to transfer.

The attached test case was bugpoint-reduced down from
`MultiSource/Benchmarks/mafft/Lalignmm.c` in the test-suite.
Without this patch, llc will crash and report the following errors:

```
LLVM ERROR: Error while trying to spill R4 from class GPR: Cannot scavenge register without an emergency spill slot!
```

Reviewed By: SixWeining

Differential Revision: https://reviews.llvm.org/D138007

Added: 
    llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll

Modified: 
    llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index a5c84380e77f4..61c2b76879e39 100644

--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -14,6 +14,7 @@
 #include "LoongArchMachineFunctionInfo.h"
 #include "LoongArchSubtarget.h"
 #include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -127,6 +128,16 @@ static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII,
   return FuncSize;
 }
 
+static bool needScavSlotForCFR(MachineFunction &MF) {
+  if (!MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
+    return false;
+  for (auto &MBB : MF)
+    for (auto &MI : MBB)
+      if (MI.getOpcode() == LoongArch::PseudoST_CFR)
+        return true;
+  return false;
+}
+
 void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
     MachineFunction &MF, RegScavenger *RS) const {
   const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
@@ -136,26 +147,33 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
       MF.getInfo<LoongArchMachineFunctionInfo>();
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
+  unsigned ScavSlotsNum = 0;
+
   // Far branches beyond 27-bit offset require a spill slot for scratch register.
-  if (!isInt<27>(estimateFunctionSizeInBytes(TII, MF))) {
+  bool IsLargeFunction = !isInt<27>(estimateFunctionSizeInBytes(TII, MF));
+  if (IsLargeFunction)
+    ScavSlotsNum = 1;
+
+  // estimateStackSize has been observed to under-estimate the final stack
+  // size, so give ourselves wiggle-room by checking for stack size
+  // representable an 11-bit signed field rather than 12-bits.
+  if (!isInt<11>(MFI.estimateStackSize(MF)))
+    ScavSlotsNum = std::max(ScavSlotsNum, 1u);
+
+  // For CFR spill.
+  if (needScavSlotForCFR(MF))
+    ++ScavSlotsNum;
+
+  // Create emergency spill slots.
+  for (unsigned i = 0; i < ScavSlotsNum; ++i) {
     int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC),
                                    false);
     RS->addScavengingFrameIndex(FI);
-    if (LAFI->getBranchRelaxationSpillFrameIndex() == -1)
+    if (IsLargeFunction && LAFI->getBranchRelaxationSpillFrameIndex() == -1)
       LAFI->setBranchRelaxationSpillFrameIndex(FI);
+    LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
+                      << ") as the emergency spill slot.\n");
   }
-  // estimateStackSize has been observed to under-estimate the final stack
-  // size, so give ourselves wiggle-room by checking for stack size
-  // representable an 11-bit signed field rather than 12-bits.
-  if (isInt<11>(MFI.estimateStackSize(MF)))
-    return;
-
-  // Create an emergency spill slot.
-  int FI =
-      MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), false);
-  RS->addScavengingFrameIndex(FI);
-  LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
-                    << ") as the emergency spill slot.\n");
 }
 
 void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,

diff  --git a/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll b/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll
new file mode 100644
index 0000000000000..4a9189c97e417
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll
@@ -0,0 +1,127 @@
+; RUN: llc --mtriple=loongarch64 -mattr=+d %s -o /dev/null
+
+; ModuleID = 'bugpoint-reduced-simplifycfg.bc'
+source_filename = "test-suite-src/MultiSource/Benchmarks/mafft/Lalignmm.c"
+
+define float @Lalignmm_hmout(ptr %seq1, ptr %eff1, i32 %icyc) {
+entry:
+  %call4 = tail call i64 @strlen(ptr dereferenceable(1) poison)
+  %conv5 = trunc i64 %call4 to i32
+  %call7 = tail call i64 @strlen(ptr dereferenceable(1) poison)
+  %call20 = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call22 = tail call ptr @AllocateFloatVec(i32 signext poison)
+  tail call void @st_OpeningGapCount(ptr poison, i32 signext %icyc, ptr %seq1, ptr %eff1, i32 signext %conv5)
+  %sub110 = add nsw i32 %conv5, -1
+  %sub111 = add nsw i32 0, -1
+  br i1 poison, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i:                             ; preds = %entry
+  %sext294 = shl i64 %call4, 32
+  %conv23.i = ashr exact i64 %sext294, 32
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %for.cond.preheader.i
+  %call.i = tail call ptr @strncpy(ptr poison, ptr poison, i64 %conv23.i)
+  br label %for.body.i
+
+if.end.i:                                         ; preds = %entry
+  %call82.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call84.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call86.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call88.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call90.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call92.i = tail call ptr @AllocateIntVec(i32 signext poison)
+  %call94.i = tail call ptr @AllocateIntVec(i32 signext poison)
+  %call104.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call108.i = tail call ptr @AllocateFloatVec(i32 signext poison)
+  %call110.i = tail call ptr @AllocateIntVec(i32 signext poison)
+  %idxprom220.i = sext i32 %sub111 to i64
+  %mpjpt.018.i = getelementptr inbounds i32, ptr %call110.i, i64 1
+  %arrayidx329.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom220.i
+  %idxprom332.i = and i64 %call7, 4294967295
+  %wide.trip.count130.i = zext i32 poison to i64
+  %0 = add nsw i64 1, -1
+  %arrayidx239.i = getelementptr inbounds float, ptr %call104.i, i64 1
+  %1 = load float, ptr %arrayidx239.i, align 4
+  store float %1, ptr %call84.i, align 4
+  %curpt.017.i = getelementptr inbounds float, ptr %call84.i, i64 1
+  %arrayidx279.i = getelementptr inbounds float, ptr %call20, i64 %0
+  %2 = load ptr, ptr poison, align 8
+  %3 = load ptr, ptr null, align 8
+  %4 = trunc i64 %0 to i32
+  br label %for.body260.us.i
+
+for.body260.us.i:                                 ; preds = %if.end292.us.i, %if.end.i
+  %indvars.iv132.i = phi i64 [ %indvars.iv.next133.i, %if.end292.us.i ], [ 1, %if.end.i ]
+  %mpjpt.026.us.i = phi ptr [ poison, %if.end292.us.i ], [ %mpjpt.018.i, %if.end.i ]
+  %curpt.025.us.i = phi ptr [ %curpt.0.us.i, %if.end292.us.i ], [ %curpt.017.i, %if.end.i ]
+  %prept.022.us.i = phi ptr [ %incdec.ptr316.us.i, %if.end292.us.i ], [ %call82.i, %if.end.i ]
+  %mi.021.us.i = phi float [ %mi.1.us.i, %if.end292.us.i ], [ poison, %if.end.i ]
+  %5 = load float, ptr %prept.022.us.i, align 4
+  %6 = add nsw i64 %indvars.iv132.i, -1
+  %arrayidx263.us.i = getelementptr inbounds float, ptr %call22, i64 %6
+  %7 = load float, ptr %arrayidx263.us.i, align 4
+  %add264.us.i = fadd float %mi.021.us.i, %7
+  %cmp265.us.i = fcmp ogt float %add264.us.i, %5
+  %wm.0.us.i = select i1 %cmp265.us.i, float %add264.us.i, float %5
+  %arrayidx270.us.i = getelementptr inbounds float, ptr poison, i64 %indvars.iv132.i
+  %cmp272.us.i = fcmp ult float 0.000000e+00, %mi.021.us.i
+  %mi.1.us.i = select i1 %cmp272.us.i, float %mi.021.us.i, float 0.000000e+00
+  %8 = trunc i64 %6 to i32
+  %mpi.1.us.i = select i1 %cmp272.us.i, i32 0, i32 %8
+  %9 = load float, ptr %arrayidx279.i, align 4
+  %add280.us.i = fadd float 0.000000e+00, %9
+  %cmp281.us.i = fcmp ogt float %add280.us.i, %wm.0.us.i
+  %wm.1.us.i = select i1 %cmp281.us.i, float %add280.us.i, float %wm.0.us.i
+  %cmp288.us.i = fcmp ult float poison, 0.000000e+00
+  br i1 %cmp288.us.i, label %if.end292.us.i, label %if.then290.us.i
+
+if.then290.us.i:                                  ; preds = %for.body260.us.i
+  store i32 %4, ptr %mpjpt.026.us.i, align 4
+  br label %if.end292.us.i
+
+if.end292.us.i:                                   ; preds = %if.then290.us.i, %for.body260.us.i
+  %10 = phi i32 [ %4, %if.then290.us.i ], [ poison, %for.body260.us.i ]
+  %add293.us.i = fadd float %wm.1.us.i, 0.000000e+00
+  %arrayidx297.us.i = getelementptr inbounds float, ptr %2, i64 %indvars.iv132.i
+  store float %add293.us.i, ptr %arrayidx297.us.i, align 4
+  %arrayidx306.us.i = getelementptr inbounds i32, ptr %call94.i, i64 %indvars.iv132.i
+  store i32 %10, ptr %arrayidx306.us.i, align 4
+  %arrayidx308.us.i = getelementptr inbounds i32, ptr %call92.i, i64 %indvars.iv132.i
+  store i32 %mpi.1.us.i, ptr %arrayidx308.us.i, align 4
+  %11 = load float, ptr %curpt.025.us.i, align 4
+  %arrayidx310.us.i = getelementptr inbounds float, ptr %call86.i, i64 %indvars.iv132.i
+  store float %11, ptr %arrayidx310.us.i, align 4
+  %arrayidx312.us.i = getelementptr inbounds float, ptr %call90.i, i64 %indvars.iv132.i
+  store float 0.000000e+00, ptr %arrayidx312.us.i, align 4
+  %arrayidx314.us.i = getelementptr inbounds float, ptr %call88.i, i64 %indvars.iv132.i
+  store float %mi.1.us.i, ptr %arrayidx314.us.i, align 4
+  %incdec.ptr316.us.i = getelementptr inbounds float, ptr %prept.022.us.i, i64 1
+  %indvars.iv.next133.i = add nuw nsw i64 %indvars.iv132.i, 1
+  %curpt.0.us.i = getelementptr inbounds float, ptr %curpt.025.us.i, i64 1
+  %exitcond137.not.i = icmp eq i64 %indvars.iv.next133.i, %wide.trip.count130.i
+  br i1 %exitcond137.not.i, label %for.end321.i, label %for.body260.us.i
+
+for.end321.i:                                     ; preds = %if.end292.us.i
+  %12 = load float, ptr %arrayidx329.i, align 4
+  %arrayidx333.i = getelementptr inbounds float, ptr %3, i64 %idxprom332.i
+  store float %12, ptr %arrayidx333.i, align 4
+  tail call fastcc void @match_calc(ptr %call104.i, ptr poison, ptr poison, i32 signext %sub111, i32 signext %conv5, ptr poison, ptr poison, i32 signext 1)
+  br label %for.body429.i
+
+for.body429.i:                                    ; preds = %for.body429.i, %for.end321.i
+  %j.743.i = phi i32 [ %sub111, %for.end321.i ], [ %sub436.i, %for.body429.i ]
+  %sub436.i = add nsw i32 %j.743.i, -1
+  %idxprom437.i = zext i32 %sub436.i to i64
+  %arrayidx438.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom437.i
+  store float 0.000000e+00, ptr %arrayidx438.i, align 4
+  store i32 %sub110, ptr poison, align 4
+  br label %for.body429.i
+}
+
+declare i64 @strlen(ptr)
+declare ptr @AllocateFloatVec(i32)
+declare void @st_OpeningGapCount(ptr, i32, ptr, ptr, i32)
+declare ptr @strncpy(ptr, ptr, i64)
+declare ptr @AllocateIntVec(i32)
+declare void @match_calc(ptr, ptr, ptr, i32, i32, ptr, ptr, i32)