[llvm] [AArch64] Fix pairing different types of registers when computing CSRs. (PR #66642)

Zhaoxuan Jiang via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 15 18:46:34 PDT 2023


https://github.com/nocchijiang updated https://github.com/llvm/llvm-project/pull/66642

>From 44d4402a12094433723e3348aeb03fa7de9f8c4d Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Sat, 16 Sep 2023 19:56:42 +0800
Subject: [PATCH 1/7] [AArch64] Fix pairing different types of registers when
 computing CSRs.

If a function has odd number of same type of registers to save, and
the calling convention also requires odd number of such type of CSRs,
an FP register would be accidentally marked as saved when
producePairRegisters returns true.

This patch also fixes the AArch64LowerHomogeneousPrologEpilog pass not
handling AArch64::NoRegister; actually this pass must be fixed along
with the register pairing so i can write a test for it.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 45 +++++++++++++++----
 .../AArch64LowerHomogeneousPrologEpilog.cpp   | 45 +++++++++++++++++--
 .../arm64-homogeneous-prolog-epilog.ll        | 18 ++++++++
 3 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 78f85faaf69bf96..db48b88c38623e4 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2768,7 +2768,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
       // Update register live in.
       if (!MRI.isReserved(RPI.Reg1))
         MBB.addLiveIn(RPI.Reg1);
-      if (!MRI.isReserved(RPI.Reg2))
+      if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
         MBB.addLiveIn(RPI.Reg2);
     }
     return true;
@@ -3018,6 +3018,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                 : (unsigned)AArch64::NoRegister;
 
   unsigned ExtraCSSpill = 0;
+  bool HasUnpairedGPR64 = false;
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
     const unsigned Reg = CSRegs[i];
@@ -3028,10 +3029,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
 
     bool RegUsed = SavedRegs.test(Reg);
     unsigned PairedReg = AArch64::NoRegister;
-    if (AArch64::GPR64RegClass.contains(Reg) ||
-        AArch64::FPR64RegClass.contains(Reg) ||
-        AArch64::FPR128RegClass.contains(Reg))
-      PairedReg = CSRegs[i ^ 1];
+    const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
+    if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
+        AArch64::FPR128RegClass.contains(Reg)) {
+      // Compensate for odd numbers of GP CSRs.
+      // For now, all the known cases of odd number of CSRs are of GPRs.
+      if (HasUnpairedGPR64)
+        PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
+      else
+        PairedReg = CSRegs[i ^ 1];
+    }
+
+    // If the function requires all the GP registers to save (SavedRegs),
+    // and there are an odd number of GP CSRs at the same time (CSRegs),
+    // PairedReg could be in a different register class from Reg, which would
+    // lead to an FPR (usually D8) accidentally being marked saved.
+    if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
+      PairedReg = AArch64::NoRegister;
+      HasUnpairedGPR64 = true;
+    }
+    assert(PairedReg == AArch64::NoRegister ||
+           AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
+           AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
+           AArch64::FPR128RegClass.contains(Reg, PairedReg));
 
     if (!RegUsed) {
       if (AArch64::GPR64RegClass.contains(Reg) &&
@@ -3130,12 +3150,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
                         << " to get a scratch register.\n");
       SavedRegs.set(UnspilledCSGPR);
+      ExtraCSSpill = UnspilledCSGPR;
+
       // MachO's compact unwind format relies on all registers being stored in
       // pairs, so if we need to spill one extra for BigStack, then we need to
       // store the pair.
-      if (producePairRegisters(MF))
-        SavedRegs.set(UnspilledCSGPRPaired);
-      ExtraCSSpill = UnspilledCSGPR;
+      if (producePairRegisters(MF)) {
+        if (UnspilledCSGPRPaired == AArch64::NoRegister) {
+          // Failed to make a pair for compact unwind format, revert spilling.
+          if (produceCompactUnwindFrame(MF)) {
+            SavedRegs.reset(UnspilledCSGPR);
+            ExtraCSSpill = AArch64::NoRegister;
+          }
+        } else
+          SavedRegs.set(UnspilledCSGPRPaired);
+      }
     }
 
     // If we didn't find an extra callee-saved register to spill, create
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index d054fe509be0b6b..d396f21c3853348 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -146,8 +146,11 @@ static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
     break;
   }
 
-  for (auto Reg : Regs)
+  for (auto Reg : Regs) {
+    if (Reg == AArch64::NoRegister)
+      continue;
     RegStream << AArch64InstPrinter::getRegisterName(Reg);
+  }
 
   return RegStream.str();
 }
@@ -195,10 +198,18 @@ static MachineFunction &createFrameHelperMachineFunction(Module *M,
 }
 
 /// Emit a store-pair instruction for frame-setup.
+/// If Reg2 is AArch64::NoRegister, emit a gap to respect the alignment
+/// for unpaired registers.
 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator Pos,
                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                       int Offset, bool IsPreDec) {
+  assert(Reg1 != AArch64::NoRegister);
+  if (Reg2 == AArch64::NoRegister) {
+    assert(AArch64::GPR64RegClass.contains(Reg1));
+    Reg2 = Reg1;
+    Reg1 = AArch64::XZR;
+  }
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
@@ -218,10 +229,18 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
 }
 
 /// Emit a load-pair instruction for frame-destroy.
+/// If Reg2 is AArch64::NoRegister, respect the alignment for unpaired
+/// registers by reading the gap into XZR.
 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator Pos,
                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                      int Offset, bool IsPostDec) {
+  assert(Reg1 != AArch64::NoRegister);
+  if (Reg2 == AArch64::NoRegister) {
+    assert(AArch64::GPR64RegClass.contains(Reg1));
+    Reg2 = Reg1;
+    Reg1 = AArch64::XZR;
+  }
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
@@ -433,9 +452,18 @@ bool AArch64LowerHomogeneousPE::lowerEpilog(
 
   DebugLoc DL = MI.getDebugLoc();
   SmallVector<unsigned, 8> Regs;
+  bool HasUnpairedReg = false;
   for (auto &MO : MI.operands())
-    if (MO.isReg())
+    if (MO.isReg()) {
+      if (!MO.getReg().isValid()) {
+        // For now we are only expecting unpaired GP registers which should
+        // occur exactly once.
+        assert(!HasUnpairedReg);
+        HasUnpairedReg = true;
+      }
       Regs.push_back(MO.getReg());
+    }
+  (void)HasUnpairedReg;
   int Size = (int)Regs.size();
   if (Size == 0)
     return false;
@@ -507,17 +535,26 @@ bool AArch64LowerHomogeneousPE::lowerProlog(
 
   DebugLoc DL = MI.getDebugLoc();
   SmallVector<unsigned, 8> Regs;
+  bool HasUnpairedReg = false;
   int LRIdx = 0;
   std::optional<int> FpOffset;
   for (auto &MO : MI.operands()) {
     if (MO.isReg()) {
-      if (MO.getReg() == AArch64::LR)
-        LRIdx = Regs.size();
+      if (MO.getReg().isValid()) {
+        if (MO.getReg() == AArch64::LR)
+          LRIdx = Regs.size();
+      } else {
+        // For now we are only expecting unpaired GP registers which should
+        // occur exactly once.
+        assert(!HasUnpairedReg);
+        HasUnpairedReg = true;
+      }
       Regs.push_back(MO.getReg());
     } else if (MO.isImm()) {
       FpOffset = MO.getImm();
     }
   }
+  (void)HasUnpairedReg;
   int Size = (int)Regs.size();
   if (Size == 0)
     return false;
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
index c414c7c2464f8be..e39375ac1e49e46 100644
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
@@ -77,3 +77,21 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
 define void @swift_async(i8* swiftasync %ctx) minsize "frame-pointer"="all" {
   ret void
 }
+
+declare void @bar(i32 %i)
+
+define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounwind minsize {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
+  call void @bar(i32 %i)
+  ret void
+}
+
+; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	stp	x28, xzr, [sp, #-80]!
+; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	ldp	x28, xzr, [sp], #96
+
+; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_PROLOG_x19x20x22x23x24x25x26x27x28x30x29:
+; CHECK-LINUX:	stp	x29, xzr, [sp, #-8]!
+; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_EPILOG_TAIL_x19x20x22x23x24x25x26x27x28x30x29:
+; CHECK-LINUX:	ldp	x29, xzr, [sp], #96

>From deb2d70b9ede5febf2fe1e24102a11d0b0b26d3d Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Sun, 8 Oct 2023 16:24:35 +0800
Subject: [PATCH 2/7] bail out for unpaired LR/FP

---
 .../lib/Target/AArch64/AArch64FrameLowering.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index db48b88c38623e4..bbee2b0c8a7bff1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -335,6 +335,23 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
   if (AFI->hasSwiftAsyncContext())
     return false;
 
+  // If there are an odd number of GPRs before LR and FP in the CSRs list,
+  // they will not be paired into one RegPairInfo, which is incompatible with
+  // the assumption made by the homogeneous prolog epilog pass.
+  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+  unsigned NumGPRs = 0;
+  for (unsigned I = 0; CSRegs[I]; ++I) {
+    Register Reg = CSRegs[I];
+    if (Reg == AArch64::LR) {
+      assert(CSRegs[I + 1] == AArch64::FP);
+      if (NumGPRs % 2 != 0)
+        return false;
+      break;
+    }
+    if (AArch64::GPR64RegClass.contains(Reg))
+      ++NumGPRs;
+  }
+
   return true;
 }
 

>From 65fdef1458a18c1471e7444c1a844aeba89c1f87 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Mon, 9 Oct 2023 20:35:31 +0800
Subject: [PATCH 3/7] emit str/ldr for unpaired cases

---
 .../AArch64LowerHomogeneousPrologEpilog.cpp   | 64 +++++++++++--------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index d396f21c3853348..231c13ff3b47570 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -198,62 +198,74 @@ static MachineFunction &createFrameHelperMachineFunction(Module *M,
 }
 
 /// Emit a store-pair instruction for frame-setup.
-/// If Reg2 is AArch64::NoRegister, emit a gap to respect the alignment
-/// for unpaired registers.
+/// If Reg2 is AArch64::NoRegister, emit STR instead.
 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator Pos,
                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                       int Offset, bool IsPreDec) {
   assert(Reg1 != AArch64::NoRegister);
-  if (Reg2 == AArch64::NoRegister) {
-    assert(AArch64::GPR64RegClass.contains(Reg1));
-    Reg2 = Reg1;
-    Reg1 = AArch64::XZR;
-  }
+  const bool IsPaired = Reg2 != AArch64::NoRegister;
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
-  if (IsPreDec)
-    Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
-  else
-    Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
+  if (IsPreDec) {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
+    else
+      Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
+  } else {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
+    else
+      Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
+  }
+  // Fix the implicit scale for non-pair cases.
+  if (!IsPaired)
+    Offset *= 8;
 
   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
   if (IsPreDec)
     MIB.addDef(AArch64::SP);
-  MIB.addReg(Reg2)
-      .addReg(Reg1)
+  if (IsPaired)
+    MIB.addReg(Reg2);
+  MIB.addReg(Reg1)
       .addReg(AArch64::SP)
       .addImm(Offset)
       .setMIFlag(MachineInstr::FrameSetup);
 }
 
 /// Emit a load-pair instruction for frame-destroy.
-/// If Reg2 is AArch64::NoRegister, respect the alignment for unpaired
-/// registers by reading the gap into XZR.
+/// If Reg2 is AArch64::NoRegister, emit LTR instead.
 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator Pos,
                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
                      int Offset, bool IsPostDec) {
   assert(Reg1 != AArch64::NoRegister);
-  if (Reg2 == AArch64::NoRegister) {
-    assert(AArch64::GPR64RegClass.contains(Reg1));
-    Reg2 = Reg1;
-    Reg1 = AArch64::XZR;
-  }
+  const bool IsPaired = Reg2 != AArch64::NoRegister;
   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
   unsigned Opc;
-  if (IsPostDec)
-    Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
-  else
-    Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
+  if (IsPostDec) {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
+    else
+      Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
+  } else {
+    if (IsFloat)
+      Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
+    else
+      Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
+  }
+  // Fix the implicit scale for non-pair cases.
+  if (!IsPaired)
+    Offset *= 8;
 
   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
   if (IsPostDec)
     MIB.addDef(AArch64::SP);
-  MIB.addReg(Reg2, getDefRegState(true))
-      .addReg(Reg1, getDefRegState(true))
+  if (IsPaired)
+    MIB.addReg(Reg2, getDefRegState(true));
+  MIB.addReg(Reg1, getDefRegState(true))
       .addReg(AArch64::SP)
       .addImm(Offset)
       .setMIFlag(MachineInstr::FrameDestroy);

>From 24d840efcc8da3597bec215183d7907a13ed6245 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Mon, 9 Oct 2023 21:09:22 +0800
Subject: [PATCH 4/7] test for unpaired cases

---
 ...arm64-homogeneous-prolog-epilog-odd-csrs.ll | 18 ++++++++++++++++++
 .../AArch64/arm64-homogeneous-prolog-epilog.ll | 18 ------------------
 2 files changed, 18 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll

diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
new file mode 100644
index 000000000000000..a3ad0cbb980e495
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -homogeneous-prolog-epilog | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu  -homogeneous-prolog-epilog | FileCheck %s --check-prefixes=CHECK-LINUX
+
+declare void @bar(i32 %i)
+
+define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounwind minsize {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
+  call void @bar(i32 %i)
+  ret void
+}
+
+; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	str	x28, [sp, #-80]!
+; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28:
+; CHECK:	ldr	x28, [sp], #96
+
+; CHECK-LINUX-NOT: OUTLINED_FUNCTION_PROLOG:
+; CHECK-LINUX-NOT: OUTLINED_FUNCTION_EPILOG:
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
index e39375ac1e49e46..c414c7c2464f8be 100644
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog.ll
@@ -77,21 +77,3 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
 define void @swift_async(i8* swiftasync %ctx) minsize "frame-pointer"="all" {
   ret void
 }
-
-declare void @bar(i32 %i)
-
-define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounwind minsize {
-  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
-  call void @bar(i32 %i)
-  ret void
-}
-
-; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28:
-; CHECK:	stp	x28, xzr, [sp, #-80]!
-; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28:
-; CHECK:	ldp	x28, xzr, [sp], #96
-
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_PROLOG_x19x20x22x23x24x25x26x27x28x30x29:
-; CHECK-LINUX:	stp	x29, xzr, [sp, #-8]!
-; CHECK-LINUX-LABEL: OUTLINED_FUNCTION_EPILOG_TAIL_x19x20x22x23x24x25x26x27x28x30x29:
-; CHECK-LINUX:	ldp	x29, xzr, [sp], #96

>From a08bd2117601b73adcb2fb0e036ac2ff7fa6aa76 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Tue, 10 Oct 2023 19:42:33 +0800
Subject: [PATCH 5/7] fix wrong scale for STRXui/LDRXui

---
 .../AArch64LowerHomogeneousPrologEpilog.cpp   | 22 ++++++++++++++-----
 ...rm64-homogeneous-prolog-epilog-odd-csrs.ll | 13 +++++++++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index 231c13ff3b47570..6f616b505b287b7 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -219,9 +219,14 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
     else
       Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
   }
-  // Fix the implicit scale for non-pair cases.
-  if (!IsPaired)
-    Offset *= 8;
+  // The implicit scale for Offset is 8.
+  TypeSize Scale(0U, false);
+  unsigned Width;
+  int64_t MinOffset, MaxOffset;
+  bool Success =
+      AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
+  assert(Success && "Invalid Opcode");
+  Offset *= (8 / (int)Scale);
 
   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
   if (IsPreDec)
@@ -256,9 +261,14 @@ static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
     else
       Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
   }
-  // Fix the implicit scale for non-pair cases.
-  if (!IsPaired)
-    Offset *= 8;
+  // The implicit scale for Offset is 8.
+  TypeSize Scale(0U, false);
+  unsigned Width;
+  int64_t MinOffset, MaxOffset;
+  bool Success =
+      AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
+  assert(Success && "Invalid Opcode");
+  Offset *= (8 / (int)Scale);
 
   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
   if (IsPostDec)
diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
index a3ad0cbb980e495..32ea93ecb1c5dad 100644
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
@@ -9,10 +9,23 @@ define void @odd_num_callee_saved_registers(ptr swifterror %error, i32 %i) nounw
   ret void
 }
 
+define void @odd_num_callee_saved_registers_with_fpr(ptr swifterror %error, i32 %i) nounwind minsize {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x20},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{d8},~{d9}"() nounwind
+  call void @bar(i32 %i)
+  ret void
+}
+
 ; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28:
 ; CHECK:	str	x28, [sp, #-80]!
 ; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28:
 ; CHECK:	ldr	x28, [sp], #96
 
+; CHECK-LABEL: _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x22x23x24x25x26x27x28d8d9:
+; CHECK:	stp	d9, d8, [sp, #-96]!
+; CHECK:	str	x28, [sp, #16]
+; CHECK-LABEL: _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x22x23x24x25x26x27x28d8d9
+; CHECK:	ldr	x28, [sp, #16]
+; CHECK:	ldp	d9, d8, [sp], #112
+
 ; CHECK-LINUX-NOT: OUTLINED_FUNCTION_PROLOG:
 ; CHECK-LINUX-NOT: OUTLINED_FUNCTION_EPILOG:

>From c139303840d48dae00bd9b2ce5361c4ee31e9b25 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Mon, 16 Oct 2023 09:42:31 +0800
Subject: [PATCH 6/7] fix bad test expectation for Linux

---
 .../AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
index 32ea93ecb1c5dad..3b90163e6d29531 100644
--- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-odd-csrs.ll
@@ -27,5 +27,5 @@ define void @odd_num_callee_saved_registers_with_fpr(ptr swifterror %error, i32
 ; CHECK:	ldr	x28, [sp, #16]
 ; CHECK:	ldp	d9, d8, [sp], #112
 
-; CHECK-LINUX-NOT: OUTLINED_FUNCTION_PROLOG:
-; CHECK-LINUX-NOT: OUTLINED_FUNCTION_EPILOG:
+; CHECK-LINUX-NOT: OUTLINED_FUNCTION_PROLOG
+; CHECK-LINUX-NOT: OUTLINED_FUNCTION_EPILOG

>From a04eee2d734a943522dd3e8354f294615ff5b8c6 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Mon, 16 Oct 2023 09:44:00 +0800
Subject: [PATCH 7/7] fix typo

---
 llvm/lib/Target/AArch64/AArch64FrameLowering.cpp                | 2 +-
 llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bbee2b0c8a7bff1..072f8398bd0d9c8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3060,7 +3060,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
     // If the function requires all the GP registers to save (SavedRegs),
     // and there are an odd number of GP CSRs at the same time (CSRegs),
     // PairedReg could be in a different register class from Reg, which would
-    // lead to an FPR (usually D8) accidentally being marked saved.
+    // lead to a FPR (usually D8) accidentally being marked saved.
     if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
       PairedReg = AArch64::NoRegister;
       HasUnpairedGPR64 = true;
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index 6f616b505b287b7..4ebe1c9e0e66033 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -240,7 +240,7 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
 }
 
 /// Emit a load-pair instruction for frame-destroy.
-/// If Reg2 is AArch64::NoRegister, emit LTR instead.
+/// If Reg2 is AArch64::NoRegister, emit LDR instead.
 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator Pos,
                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,



More information about the llvm-commits mailing list