[llvm] [AMDGPU] Use 32-bit SGPR to save/restore of SCC (PR #68367)
Sirish Pande via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 11 16:25:36 PDT 2023
https://github.com/srpande updated https://github.com/llvm/llvm-project/pull/68367
>From 144ee784651df652a54cb51e88abb14c8bda1c52 Mon Sep 17 00:00:00 2001
From: Sirish Pande <sirish.pande at amd.com>
Date: Tue, 3 Oct 2023 18:43:15 -0500
Subject: [PATCH] [AMDGPU] Use 32-bit SGPR to save/restore SCC.
SCC a bit in 32-bit STATUS register. Unless COPY's source or
destination is 64-bit, there is no need to use 64bit register.
Otherwise, it will just tie up a register unnecessarily,
which may cause register pressure in later passes.
---
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 29 ++++++++----
llvm/test/CodeGen/AMDGPU/save_restore_scc.mir | 46 +++++++++++++++++++
2 files changed, 66 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/save_restore_scc.mir
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 60cd9d4c3c35a27..645b6b3b374e0ec 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1093,7 +1093,6 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
}
void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
- bool IsWave32 = MF.getSubtarget<GCNSubtarget>().isWave32();
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
MachineBasicBlock *MBB = &*BI;
@@ -1106,13 +1105,18 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
Register SrcReg = MI.getOperand(1).getReg();
Register DstReg = MI.getOperand(0).getReg();
if (SrcReg == AMDGPU::SCC) {
+ const TargetRegisterClass *DstRC =
+ TRI->getRegClassForOperandReg(*MRI, MI.getOperand(0));
+ unsigned DstRegSize = TRI->getRegSizeInBits(*DstRC);
+ assert((DstRegSize == 64 || DstRegSize == 32) &&
+ "Expected SCC dst to be 64 or 32 bits");
+ bool IsDst32Bit = DstRegSize == 32;
Register SCCCopy = MRI->createVirtualRegister(
- TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
+ IsDst32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass);
+ unsigned Opcode =
+ IsDst32Bit ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64;
I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
- MI.getDebugLoc(),
- TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64),
- SCCCopy)
+ MI.getDebugLoc(), TII->get(Opcode), SCCCopy)
.addImm(-1)
.addImm(0);
I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
@@ -1122,9 +1126,16 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
continue;
}
if (DstReg == AMDGPU::SCC) {
- unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
- Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
+ const TargetRegisterClass *SrcRC =
+ TRI->getRegClassForOperandReg(*MRI, MI.getOperand(1));
+ unsigned SrcRegSize = TRI->getRegSizeInBits(*SrcRC);
+ assert((SrcRegSize == 64 || SrcRegSize == 32) &&
+ "Expected SCC src to be 64 or 32 bits");
+ bool IsSrc32Bit = SrcRegSize == 32;
+ unsigned Opcode = IsSrc32Bit ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ Register Exec = IsSrc32Bit ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ Register Tmp = MRI->createVirtualRegister(
+ IsSrc32Bit ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass);
I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
MI.getDebugLoc(), TII->get(Opcode))
.addReg(Tmp, getDefRegState(true))
diff --git a/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir b/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir
new file mode 100644
index 000000000000000..7ee7cf05b95911d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/save_restore_scc.mir
@@ -0,0 +1,46 @@
+# RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX906
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GFX1030
+
+---
+
+# GFX1030-LABEL: name: waterfall_kills_scc_gfx1030
+# GFX1030: %1:sreg_32 = S_CSELECT_B32 -1, 0, implicit $scc
+# GFX1030: %2:sreg_32 = S_AND_B32 %0, $exec_lo, implicit-def $scc
+
+name: waterfall_kills_scc_gfx1030
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+
+ %1:sreg_32 = COPY $scc
+
+ bb.1:
+ successors: %bb.1(0x80000000), %bb.2(0x40000000)
+
+ $exec = S_XOR_B64_term $exec, -1, implicit-def $scc
+ SI_WATERFALL_LOOP %bb.2, implicit $exec
+
+ bb.2:
+ $scc = COPY %1
+...
+
+# GFX906-LABEL: name: waterfall_kills_scc_gfx906
+# GFX906: %1:sreg_64 = S_CSELECT_B64 -1, 0, implicit $scc
+# GFX906: %2:sreg_64 = S_AND_B64 %0, $exec, implicit-def $scc
+---
+name: waterfall_kills_scc_gfx906
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+
+ %1:sreg_64_xexec = COPY $scc
+
+ bb.1:
+ successors: %bb.1(0x80000000), %bb.2(0x40000000)
+
+ $exec = S_XOR_B64_term $exec, -1, implicit-def $scc
+ SI_WATERFALL_LOOP %bb.2, implicit $exec
+
+ bb.2:
+ $scc = COPY %1
+...
More information about the llvm-commits
mailing list