[llvm] 9827806 - AMDGPU: Limit the search in finding the instruction pattern for v_swap generation.
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 16:41:13 PST 2020
Author: Changpeng Fang
Date: 2020-02-06T16:40:21-08:00
New Revision: 982780648124243131c6617c0d97fc1cb02d4e75
URL: https://github.com/llvm/llvm-project/commit/982780648124243131c6617c0d97fc1cb02d4e75
DIFF: https://github.com/llvm/llvm-project/commit/982780648124243131c6617c0d97fc1cb02d4e75.diff
LOG: AMDGPU: Limit the search in finding the instruction pattern for v_swap generation.
Summary:
Current implementation of matchSwap in SIShrinkInstructions searches the entire
use_nodbg_operands set to find the possible pattern to generate v_swap instruction.
This approach will lead to a O(N^3) in compile time for SIShrinkInstructions.
But in reality, the matching pattern only exists within nearby instructions in the
same basic block. This work limits the search to a maximum of 16 instructions, and has
a linear compile time comsumption.
Reviewers:
rampitec, arsenm
Differential Revision: https://reviews.llvm.org/D74180
Added:
Modified:
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 3986ca6dfa81..159913d2bed9 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -472,26 +472,29 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
if (!TRI.isVGPR(MRI, X))
return nullptr;
- for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
- if (YTop.getSubReg() != Tsub)
+ const unsigned SearchLimit = 16;
+ unsigned Count = 0;
+ for (auto Iter = std::next(MovT.getIterator()),
+ E = MovT.getParent()->instr_end();
+ Iter != E && Count < SearchLimit; ++Iter, ++Count) {
+
+ MachineInstr *MovY = &*Iter;
+ if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ MovY->getOpcode() != AMDGPU::COPY) ||
+ MovY->getOperand(1).getSubReg() != Tsub ||
+ MovY->getOperand(1).getReg() != T)
continue;
- MachineInstr &MovY = *YTop.getParent();
- if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
- MovY.getOpcode() != AMDGPU::COPY) ||
- MovY.getOperand(1).getSubReg() != Tsub)
- continue;
-
- Register Y = MovY.getOperand(0).getReg();
- unsigned Ysub = MovY.getOperand(0).getSubReg();
+ Register Y = MovY->getOperand(0).getReg();
+ unsigned Ysub = MovY->getOperand(0).getSubReg();
- if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+ if (!TRI.isVGPR(MRI, Y))
continue;
MachineInstr *MovX = nullptr;
- auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
- for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
- if (instReadsReg(&*I, X, Xsub, TRI) ||
+ for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
+ I != IY; ++I) {
+ if (instReadsReg(&*I, X, Xsub, TRI) ||
instModifiesReg(&*I, Y, Ysub, TRI) ||
instModifiesReg(&*I, T, Tsub, TRI) ||
(MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
@@ -516,7 +519,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovX = &*I;
}
- if (!MovX || I == E)
+ if (!MovX)
continue;
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
@@ -533,7 +536,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
}
MovX->eraseFromParent();
- MovY.eraseFromParent();
+ MovY->eraseFromParent();
MachineInstr *Next = &*std::next(MovT.getIterator());
if (MRI.use_nodbg_empty(T))
MovT.eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
index 6e3aaa98b1f0..9f36e0b5d685 100644
--- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
@@ -562,3 +562,113 @@ body: |
%1.sub0 = COPY %2.sub0
S_ENDPGM 0
...
+
+# GCN-LABEL: name: swap_exact_max_insns_apart
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %3:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
+# GCN-NEXT: S_ENDPGM 0
+
+---
+name: swap_exact_max_insns_apart
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %3 = IMPLICIT_DEF
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %0 = COPY %1
+ %1 = COPY %2
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: swap_too_far
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %3:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %4:vgpr_32 = COPY %3
+# GCN-NEXT: %3:vgpr_32 = COPY %4
+# GCN-NEXT: %0:vgpr_32 = COPY %1
+# GCN-NEXT: %1:vgpr_32 = COPY %2
+# GCN-NEXT: S_ENDPGM 0
+
+---
+name: swap_too_far
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %3 = IMPLICIT_DEF
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %4 = COPY %3
+ %3 = COPY %4
+ %0 = COPY %1
+ %1 = COPY %2
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list