[PATCH] D71386: [AMDGPU] Remove unnessary v_mov from a register to itself in WQM lowering.
Michael Bedy via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 19:37:31 PST 2019
mjbedy created this revision.
Herald added subscribers: llvm-commits, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.
- SI Whole Quad Mode phase is replacing WQM pseudo instructions with v_mov instructions.
While this is necessary for the special handling of moving results out of WWM live ranges,
it is not necessary for WQM live ranges. The result is a v_mov from a register to itself after every
WQM operation. This change uses a COPY psuedo in these cases, which allows the register
allocator to coalesce the moves away.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D71386
Files:
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/test/CodeGen/AMDGPU/wqm.ll
Index: llvm/test/CodeGen/AMDGPU/wqm.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/wqm.ll
+++ llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -117,6 +117,9 @@
;CHECK: buffer_load_dword
;CHECK: buffer_load_dword
;CHECK: v_add_f32_e32
+; WQM was inserting an unecessary v_mov to self after the v_add. Make sure this
+; does not happen - the v_add should write the return reg directly.
+;CHECK-NOT: v_mov_b32_e32
define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) {
main_body:
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -156,6 +156,7 @@
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
+ SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
void printInfo();
@@ -352,7 +353,7 @@
// inactive lanes.
markInstructionUses(MI, StateWWM, Worklist);
GlobalFlags |= StateWWM;
- LowerToCopyInstrs.push_back(&MI);
+ LowerToMovInstrs.push_back(&MI);
continue;
} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
Opcode == AMDGPU::V_SET_INACTIVE_B64) {
@@ -852,7 +853,7 @@
}
void SIWholeQuadMode::lowerCopyInstrs() {
- for (MachineInstr *MI : LowerToCopyInstrs) {
+ for (MachineInstr *MI : LowerToMovInstrs) {
for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
MI->RemoveOperand(i);
@@ -872,6 +873,12 @@
MI->setDesc(TII->get(AMDGPU::COPY));
}
}
+ for (MachineInstr *MI : LowerToCopyInstrs) {
+ for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
+ MI->RemoveOperand(i);
+
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
}
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
@@ -879,6 +886,7 @@
Blocks.clear();
LiveMaskQueries.clear();
LowerToCopyInstrs.clear();
+ LowerToMovInstrs.clear();
CallingConv = MF.getFunction().getCallingConv();
ST = &MF.getSubtarget<GCNSubtarget>();
@@ -893,7 +901,7 @@
unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
lowerLiveMaskQueries(Exec);
- if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty())
+ if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty() && LowerToMovInstrs.empty())
return !LiveMaskQueries.empty();
} else {
// Store a copy of the original live mask when required
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71386.233487.patch
Type: text/x-patch
Size: 2765 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191212/806836ea/attachment.bin>
More information about the llvm-commits
mailing list