[llvm] [AMDGPU][CodeGen] Using MBB's liveIn check in tandem with MCRegAliasIterator in SILowerSGPRSpills (PR #129848)

Vikash Gupta via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 04:13:00 PDT 2025


https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/129848

>From ff029b6cfc574eb79a91ba22dfe89f76fd0d5710 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Wed, 5 Mar 2025 06:20:45 +0000
Subject: [PATCH 1/3] [CodeGen] Utilizing register units based liveIns tracking

Currently, the machine basicblock does not fully utilizes the
laneBitmask associated with physReg liveIns to check for the
precise liveness. Conservatively, it acts fully correct now, only
if all liveIns check for MBB is in form it defines it for itself.

So, now with the use of register units tracking for MBB's liveIns
, its possible to track & check liveness for all sorts of physRegs.
---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 ++++++++
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 34 ++++++++++++++++---
 llvm/test/CodeGen/ARM/aes-erratum-fix.ll      |  8 ++---
 3 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 0d105488ee2db..88a1821a55e56 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H
 #define LLVM_CODEGEN_MACHINEBASICBLOCK_H
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SparseBitVector.h"
@@ -158,6 +159,7 @@ class MachineBasicBlock
 
   MachineFunction *xParent;
   Instructions Insts;
+  const TargetRegisterInfo *TRI;
 
   /// Keep track of the predecessor / successor basic blocks.
   SmallVector<MachineBasicBlock *, 4> Predecessors;
@@ -177,6 +179,10 @@ class MachineBasicBlock
   using LiveInVector = std::vector<RegisterMaskPair>;
   LiveInVector LiveIns;
 
+  /// Keeps track of live register units for those physical registers which
+  /// are livein of the basicblock.
+  BitVector LiveInRegUnits;
+
   /// Alignment of the basic block. One if the basic block does not need to be
   /// aligned.
   Align Alignment;
@@ -458,11 +464,17 @@ class MachineBasicBlock
   void addLiveIn(MCRegister PhysReg,
                  LaneBitmask LaneMask = LaneBitmask::getAll()) {
     LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
+    addLiveInRegUnit(PhysReg, LaneMask);
   }
   void addLiveIn(const RegisterMaskPair &RegMaskPair) {
     LiveIns.push_back(RegMaskPair);
+    addLiveInRegUnit(RegMaskPair.PhysReg, RegMaskPair.LaneMask);
   }
 
+  // Sets the register units for Reg based on the LaneMask in the
+  // LiveInRegUnits.
+  void addLiveInRegUnit(MCRegister Reg, LaneBitmask LaneMask);
+
   /// Sorts and uniques the LiveIns vector. It can be significantly faster to do
   /// this than repeatedly calling isLiveIn before calling addLiveIn for every
   /// LiveIn insertion.
@@ -484,6 +496,9 @@ class MachineBasicBlock
   void removeLiveIn(MCRegister Reg,
                     LaneBitmask LaneMask = LaneBitmask::getAll());
 
+  /// Resets the register units from LiveInRegUnits for the specified regsiters.
+  void removeLiveInRegUnit(MCRegister Reg);
+
   /// Return true if the specified register is in the live in set.
   bool isLiveIn(MCRegister Reg,
                 LaneBitmask LaneMask = LaneBitmask::getAll()) const;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index fa6b53455f145..1080b973bc381 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -51,10 +52,12 @@ static cl::opt<bool> PrintSlotIndexes(
     cl::init(true), cl::Hidden);
 
 MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
-    : BB(B), Number(-1), xParent(&MF) {
+    : BB(B), Number(-1), xParent(&MF),
+      TRI(MF.getSubtarget().getRegisterInfo()) {
   Insts.Parent = this;
   if (B)
     IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
+  LiveInRegUnits.resize(TRI->getNumRegUnits());
 }
 
 MachineBasicBlock::~MachineBasicBlock() = default;
@@ -597,6 +600,14 @@ void MachineBasicBlock::printAsOperand(raw_ostream &OS,
   printName(OS, 0);
 }
 
+void MachineBasicBlock::addLiveInRegUnit(MCRegister Reg, LaneBitmask LaneMask) {
+  for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
+    LaneBitmask UnitMask = (*Unit).second;
+    if ((UnitMask & LaneMask).any())
+      LiveInRegUnits.set((*Unit).first);
+  }
+}
+
 void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
   LiveInVector::iterator I = find_if(
       LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
@@ -604,21 +615,32 @@ void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
     return;
 
   I->LaneMask &= ~LaneMask;
-  if (I->LaneMask.none())
+  if (I->LaneMask.none()) {
     LiveIns.erase(I);
+    removeLiveInRegUnit(I->PhysReg);
+  }
 }
 
 MachineBasicBlock::livein_iterator
 MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
   // Get non-const version of iterator.
   LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin());
+  removeLiveInRegUnit(LI->PhysReg);
   return LiveIns.erase(LI);
 }
 
+void MachineBasicBlock::removeLiveInRegUnit(MCRegister Reg) {
+  for (MCRegUnit Unit : TRI->regunits(Reg))
+    LiveInRegUnits.reset(Unit);
+}
+
 bool MachineBasicBlock::isLiveIn(MCRegister Reg, LaneBitmask LaneMask) const {
-  livein_iterator I = find_if(
-      LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
-  return I != livein_end() && (I->LaneMask & LaneMask).any();
+  for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
+    LaneBitmask UnitMask = (*Unit).second;
+    if ((UnitMask & LaneMask).any() && LiveInRegUnits.test((*Unit).first))
+      return true;
+  }
+  return false;
 }
 
 void MachineBasicBlock::sortUniqueLiveIns() {
@@ -1751,12 +1773,14 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
 
 void MachineBasicBlock::clearLiveIns() {
   LiveIns.clear();
+  LiveInRegUnits.reset();
 }
 
 void MachineBasicBlock::clearLiveIns(
     std::vector<RegisterMaskPair> &OldLiveIns) {
   assert(OldLiveIns.empty() && "Vector must be empty");
   std::swap(LiveIns, OldLiveIns);
+  LiveInRegUnits.reset();
 }
 
 MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index 82f5bfd02a56e..e1361d6efa780 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -68,8 +68,8 @@ define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf16
-; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
+; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    aese.8 q8, q0
 ; CHECK-FIX-NEXT:    aesmc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
@@ -89,8 +89,8 @@ define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf32
-; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
+; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    aese.8 q8, q0
 ; CHECK-FIX-NEXT:    aesmc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
@@ -2222,8 +2222,8 @@ define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf16
-; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
+; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    aesd.8 q8, q0
 ; CHECK-FIX-NEXT:    aesimc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
@@ -2243,8 +2243,8 @@ define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf32
-; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
+; CHECK-FIX-NEXT:    vorr q0, q0, q0
 ; CHECK-FIX-NEXT:    aesd.8 q8, q0
 ; CHECK-FIX-NEXT:    aesimc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]

>From 6a7f07d0afadd6f891b8e24647a2fc8d1294e8e3 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Wed, 5 Mar 2025 08:02:38 +0000
Subject: [PATCH 2/3] [AMDGPU][CodeGen] Utilized MBB's liveIn check in
 SILowerSGPRSpills

This patch replaces use of MachineRegisterInfo's liveIn check with the
machine basicBlock's liveIn. As the MRI's liveIn is inconsistent with
the entry MBB liveIns, when it comes to the machine verifier checks
---
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp  |  2 +-
 .../spill-partial-csr-sgpr-live-ins.mir       | 36 +++++++++----------
 .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir     | 32 ++++++++---------
 3 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index d27c523425feb..ad3dea7af5b71 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -128,7 +128,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
       // incoming register value, so don't kill at the spill point. This happens
       // since we pass some special inputs (workgroup IDs) in the callee saved
       // range.
-      const bool IsLiveIn = MRI.isLiveIn(Reg);
+      const bool IsLiveIn = SaveBlock.isLiveIn(Reg);
       TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
                               RC, TRI, Register());
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir
index ab960a7084528..24c631ce5e15f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir
@@ -1,25 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills %s -o /dev/null 2>&1 | FileCheck -check-prefix=VERIFIER %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck %s
 
-# FIXME : Currently, MRI's liveIn check for registers does not take the corresponding live-in's sub-registers into account. As a result
-# in SILowerSGPRSpills, the SubReg spill gets marked KILLED even though its SuperReg is in the function Live-ins. This causes machine
-# verifier to now fail at direct usage of that SubReg, which intially should not be any problem before adding spill.
-
-# VERIFIER: After SI lower SGPR spill instructions
-
-# VERIFIER: *** Bad machine code: Using an undefined physical register ***
-# VERIFIER: - instruction: S_NOP 0, implicit $sgpr50
-# VERIFIER-NEXT: - operand 1:   implicit $sgpr50
-
-# VERIFIER: *** Bad machine code: Using an undefined physical register ***
-# VERIFIER: - instruction: S_NOP 0, implicit $sgpr52
-# VERIFIER-NEXT: - operand 1:   implicit $sgpr52
-
-# VERIFIER: *** Bad machine code: Using an undefined physical register ***
-# VERIFIER: - instruction: S_NOP 0, implicit $sgpr55
-# VERIFIER-NEXT: - operand 1:   implicit $sgpr55
-
-# VERIFIER: LLVM ERROR: Found 3 machine code errors. 
 ---
 name: spill_partial_live_csr_sgpr_test
 tracksRegLiveness: true
@@ -31,6 +12,21 @@ body:             |
   bb.0:
     liveins: $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55
 
+    ; CHECK-LABEL: name: spill_partial_live_csr_sgpr_test
+    ; CHECK: liveins: $sgpr50, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $vgpr63, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr63
+    ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr52, 1, $vgpr63
+    ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr53, 2, $vgpr63
+    ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr54, 3, $vgpr63
+    ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr55, 4, $vgpr63
+    ; CHECK-NEXT: S_NOP 0, implicit $sgpr50
+    ; CHECK-NEXT: $sgpr50 = S_MOV_B32 0
+    ; CHECK-NEXT: S_NOP 0, implicit $sgpr52
+    ; CHECK-NEXT: $sgpr52_sgpr53 = S_MOV_B64 0
+    ; CHECK-NEXT: S_NOP 0, implicit $sgpr55
+    ; CHECK-NEXT: $sgpr54_sgpr55 = S_MOV_B64 0
+    ; CHECK-NEXT: $sgpr56 = S_MOV_B32 0
     S_NOP 0, implicit $sgpr50
     $sgpr50 = S_MOV_B32 0
     S_NOP 0, implicit $sgpr52
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index 7f4f9489ea4b7..fa3fd3bc6da5b 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -55,22 +55,22 @@ body:             |
     ; GCN-LABEL: name: sgpr_spill_lane_crossover
     ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 8, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 9, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 10, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 11, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 12, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 13, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 14, $vgpr63
-    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 15, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr67, 3, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr68, 4, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr69, 5, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr70, 6, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr71, 7, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr80, 8, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr81, 9, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr82, 10, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr83, 11, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr84, 12, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr85, 13, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr86, 14, $vgpr63
+    ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr87, 15, $vgpr63
     ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]

>From 026863007a49a8b950011b561f8521f54780a28c Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Fri, 7 Mar 2025 08:06:38 +0000
Subject: [PATCH 3/3] Utilizing MCRegAliasIterator to check for all possible
 liveIn alias in SILowerSPGRSpills.

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 --------
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 34 +++----------------
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp  | 10 ++++--
 llvm/test/CodeGen/ARM/aes-erratum-fix.ll      |  8 ++---
 4 files changed, 16 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 88a1821a55e56..0d105488ee2db 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H
 #define LLVM_CODEGEN_MACHINEBASICBLOCK_H
 
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SparseBitVector.h"
@@ -159,7 +158,6 @@ class MachineBasicBlock
 
   MachineFunction *xParent;
   Instructions Insts;
-  const TargetRegisterInfo *TRI;
 
   /// Keep track of the predecessor / successor basic blocks.
   SmallVector<MachineBasicBlock *, 4> Predecessors;
@@ -179,10 +177,6 @@ class MachineBasicBlock
   using LiveInVector = std::vector<RegisterMaskPair>;
   LiveInVector LiveIns;
 
-  /// Keeps track of live register units for those physical registers which
-  /// are livein of the basicblock.
-  BitVector LiveInRegUnits;
-
   /// Alignment of the basic block. One if the basic block does not need to be
   /// aligned.
   Align Alignment;
@@ -464,17 +458,11 @@ class MachineBasicBlock
   void addLiveIn(MCRegister PhysReg,
                  LaneBitmask LaneMask = LaneBitmask::getAll()) {
     LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
-    addLiveInRegUnit(PhysReg, LaneMask);
   }
   void addLiveIn(const RegisterMaskPair &RegMaskPair) {
     LiveIns.push_back(RegMaskPair);
-    addLiveInRegUnit(RegMaskPair.PhysReg, RegMaskPair.LaneMask);
   }
 
-  // Sets the register units for Reg based on the LaneMask in the
-  // LiveInRegUnits.
-  void addLiveInRegUnit(MCRegister Reg, LaneBitmask LaneMask);
-
   /// Sorts and uniques the LiveIns vector. It can be significantly faster to do
   /// this than repeatedly calling isLiveIn before calling addLiveIn for every
   /// LiveIn insertion.
@@ -496,9 +484,6 @@ class MachineBasicBlock
   void removeLiveIn(MCRegister Reg,
                     LaneBitmask LaneMask = LaneBitmask::getAll());
 
-  /// Resets the register units from LiveInRegUnits for the specified regsiters.
-  void removeLiveInRegUnit(MCRegister Reg);
-
   /// Return true if the specified register is in the live in set.
   bool isLiveIn(MCRegister Reg,
                 LaneBitmask LaneMask = LaneBitmask::getAll()) const;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 1080b973bc381..fa6b53455f145 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -35,7 +35,6 @@
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -52,12 +51,10 @@ static cl::opt<bool> PrintSlotIndexes(
     cl::init(true), cl::Hidden);
 
 MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
-    : BB(B), Number(-1), xParent(&MF),
-      TRI(MF.getSubtarget().getRegisterInfo()) {
+    : BB(B), Number(-1), xParent(&MF) {
   Insts.Parent = this;
   if (B)
     IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
-  LiveInRegUnits.resize(TRI->getNumRegUnits());
 }
 
 MachineBasicBlock::~MachineBasicBlock() = default;
@@ -600,14 +597,6 @@ void MachineBasicBlock::printAsOperand(raw_ostream &OS,
   printName(OS, 0);
 }
 
-void MachineBasicBlock::addLiveInRegUnit(MCRegister Reg, LaneBitmask LaneMask) {
-  for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
-    LaneBitmask UnitMask = (*Unit).second;
-    if ((UnitMask & LaneMask).any())
-      LiveInRegUnits.set((*Unit).first);
-  }
-}
-
 void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
   LiveInVector::iterator I = find_if(
       LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
@@ -615,32 +604,21 @@ void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
     return;
 
   I->LaneMask &= ~LaneMask;
-  if (I->LaneMask.none()) {
+  if (I->LaneMask.none())
     LiveIns.erase(I);
-    removeLiveInRegUnit(I->PhysReg);
-  }
 }
 
 MachineBasicBlock::livein_iterator
 MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
   // Get non-const version of iterator.
   LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin());
-  removeLiveInRegUnit(LI->PhysReg);
   return LiveIns.erase(LI);
 }
 
-void MachineBasicBlock::removeLiveInRegUnit(MCRegister Reg) {
-  for (MCRegUnit Unit : TRI->regunits(Reg))
-    LiveInRegUnits.reset(Unit);
-}
-
 bool MachineBasicBlock::isLiveIn(MCRegister Reg, LaneBitmask LaneMask) const {
-  for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
-    LaneBitmask UnitMask = (*Unit).second;
-    if ((UnitMask & LaneMask).any() && LiveInRegUnits.test((*Unit).first))
-      return true;
-  }
-  return false;
+  livein_iterator I = find_if(
+      LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  return I != livein_end() && (I->LaneMask & LaneMask).any();
 }
 
 void MachineBasicBlock::sortUniqueLiveIns() {
@@ -1773,14 +1751,12 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
 
 void MachineBasicBlock::clearLiveIns() {
   LiveIns.clear();
-  LiveInRegUnits.reset();
 }
 
 void MachineBasicBlock::clearLiveIns(
     std::vector<RegisterMaskPair> &OldLiveIns) {
   assert(OldLiveIns.empty() && "Vector must be empty");
   std::swap(LiveIns, OldLiveIns);
-  LiveInRegUnits.reset();
 }
 
 MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index ad3dea7af5b71..dc84db9a070ef 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -114,8 +114,6 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
 
   MachineBasicBlock::iterator I = SaveBlock.begin();
   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
-    const MachineRegisterInfo &MRI = MF.getRegInfo();
-
     for (const CalleeSavedInfo &CS : CSI) {
       // Insert the spill to the stack frame.
       MCRegister Reg = CS.getReg();
@@ -128,7 +126,13 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
       // incoming register value, so don't kill at the spill point. This happens
       // since we pass some special inputs (workgroup IDs) in the callee saved
       // range.
-      const bool IsLiveIn = SaveBlock.isLiveIn(Reg);
+      bool IsLiveIn = false;
+      for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
+        if (SaveBlock.isLiveIn(*R)) {
+          IsLiveIn = true;
+          break;
+        }
+      }
       TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
                               RC, TRI, Register());
 
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index e1361d6efa780..82f5bfd02a56e 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -68,8 +68,8 @@ define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf16
-; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    vorr q0, q0, q0
+; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    aese.8 q8, q0
 ; CHECK-FIX-NEXT:    aesmc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
@@ -89,8 +89,8 @@ define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf32
-; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    vorr q0, q0, q0
+; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    aese.8 q8, q0
 ; CHECK-FIX-NEXT:    aesmc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
@@ -2222,8 +2222,8 @@ define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf16
-; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    vorr q0, q0, q0
+; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    aesd.8 q8, q0
 ; CHECK-FIX-NEXT:    aesimc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
@@ -2243,8 +2243,8 @@ define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind {
 ; CHECK-FIX-NEXT:    push {r4, lr}
 ; CHECK-FIX-NEXT:    mov r4, r0
 ; CHECK-FIX-NEXT:    bl get_inputf32
-; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    vorr q0, q0, q0
+; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
 ; CHECK-FIX-NEXT:    aesd.8 q8, q0
 ; CHECK-FIX-NEXT:    aesimc.8 q8, q8
 ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]



More information about the llvm-commits mailing list