[llvm] [AArch64] Fix pairing different types of registers when computing CSRs. (PR #66642)

Zhaoxuan Jiang via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 18 06:08:11 PDT 2023


https://github.com/nocchijiang created https://github.com/llvm/llvm-project/pull/66642

If a function has odd number of same type of registers to save, and the calling convention also requires odd number of such type of CSRs, an FP register would be accidentally marked as saved when producePairRegisters returns true.

This patch also fixes the AArch64LowerHomogeneousPrologEpilog pass not handling AArch64::NoRegister; actually this pass must be fixed along with the register pairing so i can write a test for it.

>From 19cc004d0027fd307552eeae71c7b5cf35de6000 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Sat, 16 Sep 2023 19:56:42 +0800
Subject: [PATCH] [AArch64] Fix pairing different types of registers when
 computing CSRs.

If a function has odd number of same type of registers to save, and
the calling convention also requires odd number of such type of CSRs,
an FP register would be accidentally marked as saved when
producePairRegisters returns true.

This patch also fixes the AArch64LowerHomogeneousPrologEpilog pass not
handling AArch64::NoRegister; actually this pass must be fixed along
with the register pairing so i can write a test for it.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 45 +++++++++++++++----
 .../AArch64LowerHomogeneousPrologEpilog.cpp   | 45 +++++++++++++++++--
 .../arm64-homogeneous-prolog-epilog.ll        | 18 ++++++++
 3 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 68e68449d4073b2..d35a9fbda628859 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2815,7 +2815,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
       // Update register live in.
       if (!MRI.isReserved(RPI.Reg1))
         MBB.addLiveIn(RPI.Reg1);
-      if (!MRI.isReserved(RPI.Reg2))
+      if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
         MBB.addLiveIn(RPI.Reg2);
     }
     return true;
@@ -3065,6 +3065,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                 : (unsigned)AArch64::NoRegister;
 
   unsigned ExtraCSSpill = 0;
+  bool HasUnpairedGPR64 = false;
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
     const unsigned Reg = CSRegs[i];
@@ -3075,10 +3076,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
 
     bool RegUsed = SavedRegs.test(Reg);
     unsigned PairedReg = AArch64::NoRegister;
-    if (AArch64::GPR64RegClass.contains(Reg) ||
-        AArch64::FPR64RegClass.contains(Reg) ||
-        AArch64::FPR128RegClass.contains(Reg))
-      PairedReg = CSRegs[i ^ 1];
+    const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
+    if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
+        AArch64::FPR128RegClass.contains(Reg)) {
+      // Compensate for odd numbers of GP CSRs.
+      if (HasUnpairedGPR64)
+        PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
+      else
+        PairedReg = CSRegs[i ^ 1];
+    }
+
+    // If the function requires odd number of GP registers to save (SavedRegs),
+    // and there are odd number of GP CSRs at the same time (CSRegs),
+    // PairedReg could be in a different register class from Reg, which would
+    // lead to an FP register accidentally being marked saved.
+    // For now, all the known cases of odd number of CSRs are of GP registers.
+    if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
+      PairedReg = AArch64::NoRegister;
+      HasUnpairedGPR64 = true;
+    }
+    assert(PairedReg == AArch64::NoRegister ||
+           AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
+           AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
+           AArch64::FPR128RegClass.contains(Reg, PairedReg));
 
     if (!RegUsed) {
       if (AArch64::GPR64RegClass.contains(Reg) &&
@@ -3177,12 +3197,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
                         << " to get a scratch register.\n");
       SavedRegs.set(UnspilledCSGPR);
+      ExtraCSSpill = UnspilledCSGPR;
+
       // MachO's compact unwind format relies on all registers being stored in
       // pairs, so if we need to spill one extra for BigStack, then we need to
       // store the pair.
-      if (producePairRegisters(MF))
-        SavedRegs.set(UnspilledCSGPRPaired);
-      ExtraCSSpill = UnspilledCSGPR;
+      if (producePairRegisters(MF)) {
+        if (UnspilledCSGPRPaired == AArch64::NoRegister) {
+          // Failed to make a pair for compact unwind format, revert spilling.
+          if (produceCompactUnwindFrame(MF)) {
+            SavedRegs.reset(UnspilledCSGPR);
+            ExtraCSSpill = AArch64::NoRegister;
+          }
+        } else
+          SavedRegs.set(UnspilledCSGPRPaired);
+      }
     }
 
     // If we didn't find an extra callee-saved register to spill, create
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index d054fe509be0b6b..d396f21c3853348 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -146,8 +146,11 @@ static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
     break;
   }
 
-  for (auto Reg : Regs)
+  for (auto Reg : Regs) {
+    if (Reg == AArch64::NoRegister)
+      continue;
     RegStream << AArch64InstPrinter::getRegisterName(Reg);
+  }
 
   return RegStream.str();
 }
@@ -195,10 +198,18 @@ static MachineFunction &createFrameHelperMachineFunction(Module *M,
 }
 
 /// Emit a store-pair instruction for frame-setup.
+/// If Reg2 is AArch64::NoRegister, emit a gap to respect the alignment
+/// for unpaired registers.
 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator Pos,
                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                       int Offset, bool IsPreDec) {
+  assert(Reg1 != AArch64::NoRegister);
+  if (Reg2 == AArch64::NoRegister) {
+    assert(AArch64::GPR64RegClass.contains(Reg1));
+    Reg2 = Reg1;
+    Reg1 = AArch64::XZR;
+  }
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
@@ -218,10 +229,18 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
 }
 
 /// Emit a load-pair instruction for frame-destroy.
+/// If Reg2 is AArch64::NoRegister, respect the alignment for unpaired
+/// registers by reading the gap into XZR.
 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator Pos,
                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                      int Offset, bool IsPostDec) {
+  assert(Reg1 != AArch64::NoRegister);
+  if (Reg2 == AArch64::NoRegister) {
+    assert(AArch64::GPR64RegClass.contains(Reg1));
+    Reg2 = Reg1;
+    Reg1 = AArch64::XZR;
+  }
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
@@ -433,9 +452,18 @@ bool AArch64LowerHomogeneousPE::lowerEpilog(
 
   DebugLoc DL = MI.getDebugLoc();
   SmallVector<unsigned, 8> Regs;
+  bool HasUnpairedReg = false;
   for (auto &MO : MI.operands())
-    if (MO.isReg())
+    if (MO.isReg()) {
+      if (!MO.getReg().isValid()) {
+        // For now we are only expecting unpaired GP registers which should
+        // occur exactly once.
+        assert(!HasUnpairedReg);
+        HasUnpairedReg = true;
+      }
       Regs.push_back(MO.getReg());
+    }
+  (void)HasUnpairedReg;
   int Size = (int)Regs.size();
   if (Size == 0)
     return false;
@@ -507,17 +535,26 @@ bool AArch64LowerHomogeneousPE::lowerProlog(
 
   DebugLoc DL = MI.getDebugLoc();
   SmallVector<unsigned, 8> Regs;
+  bool HasUnpairedReg = false;
   int LRIdx = 0;
   std::optional<int> FpOffset;
   for (auto &MO : MI.operands()) {
     if (MO.isReg()) {
-      if (MO.getReg() == AArch64::LR)
-        LRIdx = Regs.size();
+      if (MO.getReg().isValid()) {
+        if (MO.getReg() == AArch64::LR)
+          LRIdx = Regs.size();
+      } else {
+        // For now we are only expecting unpaired GP registers which should
+        // occur exactly once.
+        assert(!HasUnpairedReg);
+        HasUnpairedReg = true;
+      }
       Regs.push_back(MO.getReg());
     } else if (MO.isImm()) {
       FpOffset = MO.getImm();
     }
   }
+  (void)HasUnpairedReg;
   int Size = (int)Regs.size();
   if (Size == 0)
     return false;
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
index 9e32be58273eec4..bc2223f69c229d8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
@@ -72,3 +72,21 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
 ; CHECK-LINUX-NEXT: ldp     x22, x21, [sp, #16]
 ; CHECK-LINUX-NEXT: ldp     x29, x30, [sp], #48
 ; CHECK-LINUX-NEXT: ret     x16
+
+declare void @bar(i32 %i)
+
+define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounwind minsize {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
+  call void @bar(i32 %i)
+  ret void
+}
+
+; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	stp	x28, xzr, [sp, #-80]!
+; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	ldp	x28, xzr, [sp], #96
+
+; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_PROLOG_x19x20x22x23x24x25x26x27x28x30x29:
+; CHECK-LINUX:	stp	x29, xzr, [sp, #-8]!
+; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_EPILOG_TAIL_x19x20x22x23x24x25x26x27x28x30x29:
+; CHECK-LINUX:	ldp	x29, xzr, [sp], #96



More information about the llvm-commits mailing list