[llvm] [AMDGPU] Generate more swaps (PR #184164)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 12:03:54 PST 2026
================
@@ -724,73 +726,79 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
const unsigned SearchLimit = 16;
unsigned Count = 0;
- bool KilledT = false;
+
+ MachineInstr *MovX = nullptr;
+ MachineInstr *InsertionPt = nullptr;
+ MachineInstr *MovY = nullptr;
+
for (auto Iter = std::next(MovT.getIterator()),
E = MovT.getParent()->instr_end();
- Iter != E && Count < SearchLimit && !KilledT; ++Iter) {
-
- MachineInstr *MovY = &*Iter;
- KilledT = MovY->killsRegister(T, TRI);
- if (MovY->isDebugInstr())
+ Iter != E && Count < SearchLimit; ++Iter) {
+ if (Iter->isDebugInstr())
continue;
++Count;
- if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
- MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
- MovY->getOpcode() != AMDGPU::COPY) ||
- !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
- MovY->getOperand(1).getSubReg() != Tsub)
- continue;
-
- Register Y = MovY->getOperand(0).getReg();
- unsigned Ysub = MovY->getOperand(0).getSubReg();
-
- if (!TRI->isVGPR(*MRI, Y))
- continue;
-
- MachineInstr *MovX = nullptr;
- for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
- I != IY; ++I) {
- if (I->isDebugInstr())
- continue;
- if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
- instModifiesReg(&*I, T, Tsub) ||
- (MovX && instModifiesReg(&*I, X, Xsub))) {
- MovX = nullptr;
- break;
- }
- if (!instReadsReg(&*I, Y, Ysub)) {
- if (!MovX && instModifiesReg(&*I, X, Xsub)) {
- MovX = nullptr;
- break;
- }
- continue;
+ if (instModifiesReg(&*Iter, T, Tsub))
+ return nullptr;
+
+ if (!MovX) {
+ // Search for mov x, y.
+ if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
+ Iter->getOpcode() == AMDGPU::COPY) &&
+ Iter->getOperand(0).getReg() == X &&
+ Iter->getOperand(0).getSubReg() == Xsub &&
+ Iter->getOperand(1).isReg()) {
+ MovX = &*Iter;
+ Y = MovX->getOperand(1).getReg();
+ Ysub = MovX->getOperand(1).getSubReg();
}
- if (MovX ||
- (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
- I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
- I->getOpcode() != AMDGPU::COPY) ||
- I->getOperand(0).getReg() != X ||
- I->getOperand(0).getSubReg() != Xsub) {
- MovX = nullptr;
+ } else {
+ // mov x, y has been found.
+ // Search for mov y, t.
+ if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
+ Iter->getOpcode() == AMDGPU::COPY) &&
----------------
arsenm wrote:
I'm wondering if we really should do this pre-RA at all
https://github.com/llvm/llvm-project/pull/184164
More information about the llvm-commits
mailing list