[llvm] WIP. [AMDGPU] Convert s_cselect to v_cndmask if its single user isVALU (PR #113705)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 08:58:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (alex-t)
<details>
<summary>Changes</summary>
This is a draft aiming to convert s_cselect_b32 to v_cndmask_b32 in case the condition operand for select is a copy to SCC and a single user of the select result is VALU.
---
Full diff: https://github.com/llvm/llvm-project/pull/113705.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (+30)
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 654ae412f39c13..192277caaf4ab0 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1107,6 +1107,7 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
bool IsWave32 = MF.getSubtarget<GCNSubtarget>().isWave32();
for (MachineBasicBlock &MBB : MF) {
+ SIInstrWorklist Worklist;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
++I) {
MachineInstr &MI = *I;
@@ -1132,6 +1133,34 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
continue;
}
if (DstReg == AMDGPU::SCC) {
+ MachineBasicBlock::iterator J = std::next(I);
+ MachineBasicBlock::iterator E = MBB.end();
+ for (; J != E; ++J) {
+ if (J->definesRegister(DstReg, TRI))
+ break;
+ if (J->readsRegister(DstReg, TRI) &&
+ J->getOpcode() == AMDGPU::S_CSELECT_B32) {
+ Register SelectResult = J->getOperand(0).getReg();
+ if (MRI->hasOneUse(SelectResult)) {
+ MachineInstr *User = &*MRI->use_instr_begin(SelectResult);
+ if (User->isCopy()) {
+ const TargetRegisterClass *SrcRC =
+ TRI->getRegClassForReg(*MRI, SelectResult);
+ const TargetRegisterClass *DstRC =
+ TRI->getRegClassForOperandReg(*MRI, User->getOperand(0));
+ if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
+ Worklist.insert(&*J);
+ }
+ }
+ }
+ }
+ }
+ if (!Worklist.empty()) {
+ TII->moveToVALU(Worklist, MDT);
+ I = std::next(I);
+ continue;
+ }
+
unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
@@ -1140,6 +1169,7 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
.addReg(Tmp, getDefRegState(true))
.addReg(SrcReg)
.addReg(Exec);
+
MI.eraseFromParent();
}
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 9da27a7c7ee7d6..71df0cf1bd17d8 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1907,9 +1907,6 @@ let AddedComplexity = 20 in {
(S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
>;
- // TODO: The predicate should not be necessary, but enabling this pattern for
- // all subtargets generates worse code in some cases.
- let OtherPredicates = [HasPseudoScalarTrans] in
def : GCNPat<
(f32 (UniformSelect f32:$src0, f32:$src1)),
(S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
``````````
</details>
https://github.com/llvm/llvm-project/pull/113705
More information about the llvm-commits
mailing list