[llvm] [AMDGPU] Merge consecutive wait_alu instruction (PR #128916)
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 09:30:45 PST 2025
================
@@ -164,6 +170,41 @@ class AMDGPUWaitSGPRHazards {
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
}
+ unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+ unsigned Mask = 0xffff;
+ Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
+ AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
+ return Mask;
+ }
+
+ MachineInstr *getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
----------------
nhaehnle wrote:
Let's just only skip debug instructions, so that debug instructions affect code generation as little as possible.
https://github.com/llvm/llvm-project/pull/128916
More information about the llvm-commits
mailing list