[llvm] 041a786 - [AArch64] Fix pairing different types of registers when computing CSRs. (#66642)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 16 23:34:08 PDT 2023


Author: Zhaoxuan Jiang
Date: 2023-10-16T23:34:04-07:00
New Revision: 041a786c78fbcee3537ca636bf796bb18fb6f313

URL: https://github.com/llvm/llvm-project/commit/041a786c78fbcee3537ca636bf796bb18fb6f313
DIFF: https://github.com/llvm/llvm-project/commit/041a786c78fbcee3537ca636bf796bb18fb6f313.diff

LOG: [AArch64] Fix pairing different types of registers when computing CSRs. (#66642)

If a function has odd number of same type of registers to save, and the
calling convention also requires odd number of such type of CSRs, an FP
register would be accidentally marked as saved when producePairRegisters
returns true.

This patch also fixes the AArch64LowerHomogeneousPrologEpilog pass not
handling AArch64::NoRegister; actually this pass must be fixed along
with the register pairing so i can write a test for it.

Added: 
    llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index e68d67c6e78de2c..880de7d0306a7e1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -330,6 +330,23 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
   if (AFI->hasSwiftAsyncContext())
     return false;
 
+  // If there are an odd number of GPRs before LR and FP in the CSRs list,
+  // they will not be paired into one RegPairInfo, which is incompatible with
+  // the assumption made by the homogeneous prolog epilog pass.
+  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+  unsigned NumGPRs = 0;
+  for (unsigned I = 0; CSRegs[I]; ++I) {
+    Register Reg = CSRegs[I];
+    if (Reg == AArch64::LR) {
+      assert(CSRegs[I + 1] == AArch64::FP);
+      if (NumGPRs % 2 != 0)
+        return false;
+      break;
+    }
+    if (AArch64::GPR64RegClass.contains(Reg))
+      ++NumGPRs;
+  }
+
   return true;
 }
 
@@ -2750,7 +2767,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
       // Update register live in.
       if (!MRI.isReserved(RPI.Reg1))
         MBB.addLiveIn(RPI.Reg1);
-      if (!MRI.isReserved(RPI.Reg2))
+      if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
         MBB.addLiveIn(RPI.Reg2);
     }
     return true;
@@ -3000,6 +3017,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                 : (unsigned)AArch64::NoRegister;
 
   unsigned ExtraCSSpill = 0;
+  bool HasUnpairedGPR64 = false;
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
     const unsigned Reg = CSRegs[i];
@@ -3010,10 +3028,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
 
     bool RegUsed = SavedRegs.test(Reg);
     unsigned PairedReg = AArch64::NoRegister;
-    if (AArch64::GPR64RegClass.contains(Reg) ||
-        AArch64::FPR64RegClass.contains(Reg) ||
-        AArch64::FPR128RegClass.contains(Reg))
-      PairedReg = CSRegs[i ^ 1];
+    const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
+    if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
+        AArch64::FPR128RegClass.contains(Reg)) {
+      // Compensate for odd numbers of GP CSRs.
+      // For now, all the known cases of odd number of CSRs are of GPRs.
+      if (HasUnpairedGPR64)
+        PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
+      else
+        PairedReg = CSRegs[i ^ 1];
+    }
+
+    // If the function requires all the GP registers to save (SavedRegs),
+    // and there are an odd number of GP CSRs at the same time (CSRegs),
+    // PairedReg could be in a 
diff erent register class from Reg, which would
+    // lead to a FPR (usually D8) accidentally being marked saved.
+    if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
+      PairedReg = AArch64::NoRegister;
+      HasUnpairedGPR64 = true;
+    }
+    assert(PairedReg == AArch64::NoRegister ||
+           AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
+           AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
+           AArch64::FPR128RegClass.contains(Reg, PairedReg));
 
     if (!RegUsed) {
       if (AArch64::GPR64RegClass.contains(Reg) &&
@@ -3112,12 +3149,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
                         << " to get a scratch register.\n");
       SavedRegs.set(UnspilledCSGPR);
+      ExtraCSSpill = UnspilledCSGPR;
+
       // MachO's compact unwind format relies on all registers being stored in
       // pairs, so if we need to spill one extra for BigStack, then we need to
       // store the pair.
-      if (producePairRegisters(MF))
-        SavedRegs.set(UnspilledCSGPRPaired);
-      ExtraCSSpill = UnspilledCSGPR;
+      if (producePairRegisters(MF)) {
+        if (UnspilledCSGPRPaired == AArch64::NoRegister) {
+          // Failed to make a pair for compact unwind format, revert spilling.
+          if (produceCompactUnwindFrame(MF)) {
+            SavedRegs.reset(UnspilledCSGPR);
+            ExtraCSSpill = AArch64::NoRegister;
+          }
+        } else
+          SavedRegs.set(UnspilledCSGPRPaired);
+      }
     }
 
     // If we didn't find an extra callee-saved register to spill, create

diff  --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index d054fe509be0b6b..4ebe1c9e0e66033 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -146,8 +146,11 @@ static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
     break;
   }
 
-  for (auto Reg : Regs)
+  for (auto Reg : Regs) {
+    if (Reg == AArch64::NoRegister)
+      continue;
     RegStream << AArch64InstPrinter::getRegisterName(Reg);
+  }
 
   return RegStream.str();
 }
@@ -195,46 +198,84 @@ static MachineFunction &createFrameHelperMachineFunction(Module *M,
 }
 
 /// Emit a store-pair instruction for frame-setup.
+/// If Reg2 is AArch64::NoRegister, emit STR instead.
 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator Pos,
                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                       int Offset, bool IsPreDec) {
+  assert(Reg1 != AArch64::NoRegister);
+  const bool IsPaired = Reg2 != AArch64::NoRegister;
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
-  if (IsPreDec)
-    Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
-  else
-    Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
+  if (IsPreDec) {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
+    else
+      Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
+  } else {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
+    else
+      Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
+  }
+  // The implicit scale for Offset is 8.
+  TypeSize Scale(0U, false);
+  unsigned Width;
+  int64_t MinOffset, MaxOffset;
+  bool Success =
+      AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
+  assert(Success && "Invalid Opcode");
+  Offset *= (8 / (int)Scale);
 
   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
   if (IsPreDec)
     MIB.addDef(AArch64::SP);
-  MIB.addReg(Reg2)
-      .addReg(Reg1)
+  if (IsPaired)
+    MIB.addReg(Reg2);
+  MIB.addReg(Reg1)
       .addReg(AArch64::SP)
       .addImm(Offset)
       .setMIFlag(MachineInstr::FrameSetup);
 }
 
 /// Emit a load-pair instruction for frame-destroy.
+/// If Reg2 is AArch64::NoRegister, emit LDR instead.
 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator Pos,
                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                      int Offset, bool IsPostDec) {
+  assert(Reg1 != AArch64::NoRegister);
+  const bool IsPaired = Reg2 != AArch64::NoRegister;
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
-  if (IsPostDec)
-    Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
-  else
-    Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
+  if (IsPostDec) {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
+    else
+      Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
+  } else {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
+    else
+      Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
+  }
+  // The implicit scale for Offset is 8.
+  TypeSize Scale(0U, false);
+  unsigned Width;
+  int64_t MinOffset, MaxOffset;
+  bool Success =
+      AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
+  assert(Success && "Invalid Opcode");
+  Offset *= (8 / (int)Scale);
 
   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
   if (IsPostDec)
     MIB.addDef(AArch64::SP);
-  MIB.addReg(Reg2, getDefRegState(true))
-      .addReg(Reg1, getDefRegState(true))
+  if (IsPaired)
+    MIB.addReg(Reg2, getDefRegState(true));
+  MIB.addReg(Reg1, getDefRegState(true))
       .addReg(AArch64::SP)
       .addImm(Offset)
       .setMIFlag(MachineInstr::FrameDestroy);
@@ -433,9 +474,18 @@ bool AArch64LowerHomogeneousPE::lowerEpilog(
 
   DebugLoc DL = MI.getDebugLoc();
   SmallVector<unsigned, 8> Regs;
+  bool HasUnpairedReg = false;
   for (auto &MO : MI.operands())
-    if (MO.isReg())
+    if (MO.isReg()) {
+      if (!MO.getReg().isValid()) {
+        // For now we are only expecting unpaired GP registers which should
+        // occur exactly once.
+        assert(!HasUnpairedReg);
+        HasUnpairedReg = true;
+      }
       Regs.push_back(MO.getReg());
+    }
+  (void)HasUnpairedReg;
   int Size = (int)Regs.size();
   if (Size == 0)
     return false;
@@ -507,17 +557,26 @@ bool AArch64LowerHomogeneousPE::lowerProlog(
 
   DebugLoc DL = MI.getDebugLoc();
   SmallVector<unsigned, 8> Regs;
+  bool HasUnpairedReg = false;
   int LRIdx = 0;
   std::optional<int> FpOffset;
   for (auto &MO : MI.operands()) {
     if (MO.isReg()) {
-      if (MO.getReg() == AArch64::LR)
-        LRIdx = Regs.size();
+      if (MO.getReg().isValid()) {
+        if (MO.getReg() == AArch64::LR)
+          LRIdx = Regs.size();
+      } else {
+        // For now we are only expecting unpaired GP registers which should
+        // occur exactly once.
+        assert(!HasUnpairedReg);
+        HasUnpairedReg = true;
+      }
       Regs.push_back(MO.getReg());
     } else if (MO.isImm()) {
       FpOffset = MO.getImm();
     }
   }
+  (void)HasUnpairedReg;
   int Size = (int)Regs.size();
   if (Size == 0)
     return false;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
new file mode 100644
index 000000000000000..3b90163e6d29531
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -homogeneous-prolog-epilog | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu  -homogeneous-prolog-epilog | FileCheck %s --check-prefixes=CHECK-LINUX
+
+declare void @bar(i32 %i)
+
+define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounwind minsize {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
+  call void @bar(i32 %i)
+  ret void
+}
+
+define void @odd_num_callee_saved_registers_with_fpr(ptr swifterror %error, i32 %i) nounwind minsize {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{d8},~{d9}"() nounwind
+  call void @bar(i32 %i)
+  ret void
+}
+
+; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	str	x28, [sp, #-80]!
+; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	ldr	x28, [sp], #96
+
+; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28d8d9:
+; CHECK:	stp	d9, d8, [sp, #-96]!
+; CHECK:	str	x28, [sp, #16]
+; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28d8d9
+; CHECK:	ldr	x28, [sp, #16]
+; CHECK:	ldp	d9, d8, [sp], #112
+
+; CHECK-LINUX-NOT: OUTLINED_FUNCTION_PROLOG
+; CHECK-LINUX-NOT: OUTLINED_FUNCTION_EPILOG


        


More information about the llvm-commits mailing list