[llvm] [AMDGPU] Use 32-bit SGPR to save/restore of SCC (PR #68367)

Sirish Pande via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 11 16:25:36 PDT 2023


https://github.com/srpande updated https://github.com/llvm/llvm-project/pull/68367

>From 144ee784651df652a54cb51e88abb14c8bda1c52 Mon Sep 17 00:00:00 2001
From: Sirish Pande <sirish.pande at amd.com>
Date: Tue, 3 Oct 2023 18:43:15 -0500
Subject: [PATCH] [AMDGPU] Use 32-bit SGPR to save/restore SCC.

SCC a bit in 32-bit STATUS register. Unless COPY's source or
destination is 64-bit, there is no need to use 64bit register.
Otherwise, it will just tie up a register unnecessarily,
which may cause register pressure in later passes.
---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp    | 29 ++++++++----
 llvm/test/CodeGen/AMDGPU/save_restore_scc.mir | 46 +++++++++++++++++++
 2 files changed, 66 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/save_restore_scc.mir

diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 60cd9d4c3c35a27..645b6b3b374e0ec 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1093,7 +1093,6 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
 }
 
 void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
-  bool IsWave32 = MF.getSubtarget<GCNSubtarget>().isWave32();
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
        ++BI) {
     MachineBasicBlock *MBB = &*BI;
@@ -1106,13 +1105,18 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
       Register SrcReg = MI.getOperand(1).getReg();
       Register DstReg = MI.getOperand(0).getReg();
       if (SrcReg == AMDGPU::SCC) {
+        const TargetRegisterClass *DstRC =
+            TRI->getRegClassForOperandReg(*MRI, MI.getOperand(0));
+        unsigned DstRegSize = TRI->getRegSizeInBits(*DstRC);
+        assert((DstRegSize == 64 || DstRegSize == 32) &&
+               "Expected SCC dst to be 64 or 32 bits");
+        bool IsDst32Bit = DstRegSize == 32;
         Register SCCCopy = MRI->createVirtualRegister(
-            TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
+            IsDst32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass);
+        unsigned Opcode =
+            IsDst32Bit ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64;
         I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
-                    MI.getDebugLoc(),
-                    TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
-                                      : AMDGPU::S_CSELECT_B64),
-                    SCCCopy)
+                    MI.getDebugLoc(), TII->get(Opcode), SCCCopy)
                 .addImm(-1)
                 .addImm(0);
         I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
@@ -1122,9 +1126,16 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
         continue;
       }
       if (DstReg == AMDGPU::SCC) {
-        unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
-        Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-        Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
+        const TargetRegisterClass *SrcRC =
+            TRI->getRegClassForOperandReg(*MRI, MI.getOperand(1));
+        unsigned SrcRegSize = TRI->getRegSizeInBits(*SrcRC);
+        assert((SrcRegSize == 64 || SrcRegSize == 32) &&
+               "Expected SCC src to be 64 or 32 bits");
+        bool IsSrc32Bit = SrcRegSize == 32;
+        unsigned Opcode = IsSrc32Bit ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+        Register Exec = IsSrc32Bit ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+        Register Tmp = MRI->createVirtualRegister(
+            IsSrc32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass);
         I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
                     MI.getDebugLoc(), TII->get(Opcode))
                 .addReg(Tmp, getDefRegState(true))
diff --git a/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir b/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir
new file mode 100644
index 000000000000000..7ee7cf05b95911d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir
@@ -0,0 +1,46 @@
+# RUN: llc -march=amdgcn -mcpu=gfx906  -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX906
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX1030
+
+---
+
+# GFX1030-LABEL: name: waterfall_kills_scc_gfx1030
+# GFX1030: %1:sreg_32 = S_CSELECT_B32 -1, 0, implicit $scc
+# GFX1030: %2:sreg_32 = S_AND_B32 %0, $exec_lo, implicit-def $scc
+
+name:            waterfall_kills_scc_gfx1030
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    %1:sreg_32 = COPY $scc
+  
+  bb.1:
+    successors: %bb.1(0x80000000), %bb.2(0x40000000)
+  
+    $exec = S_XOR_B64_term $exec, -1, implicit-def $scc
+    SI_WATERFALL_LOOP %bb.2, implicit $exec
+  
+  bb.2:
+    $scc = COPY %1
+...
+
+# GFX906-LABEL: name: waterfall_kills_scc_gfx906
+# GFX906: %1:sreg_64 = S_CSELECT_B64 -1, 0, implicit $scc
+# GFX906: %2:sreg_64 = S_AND_B64 %0, $exec, implicit-def $scc
+---
+name:            waterfall_kills_scc_gfx906
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    %1:sreg_64_xexec = COPY $scc
+  
+  bb.1:
+    successors: %bb.1(0x80000000), %bb.2(0x40000000)
+  
+    $exec = S_XOR_B64_term $exec, -1, implicit-def $scc
+    SI_WATERFALL_LOOP %bb.2, implicit $exec
+  
+  bb.2:
+    $scc = COPY %1
+...



More information about the llvm-commits mailing list