[llvm] [AMDGPU] Merge consecutive wait_alu instruction (PR #128916)

Ana Mihajlovic via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 02:44:45 PST 2025


================
@@ -164,6 +164,47 @@ class AMDGPUWaitSGPRHazards {
       BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
   }
 
+  unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+    unsigned Mask = 0xffff;
+    Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
+                       AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
+                       AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
+    return Mask;
+  }
+
+  bool mergeSubsequentWaitAlus(MachineBasicBlock::instr_iterator &MI,
+                               unsigned Mask) {
+    auto MBB = MI->getParent();
+    if (MI != MBB->instr_begin()) {
----------------
mihajlovicana wrote:

Do you mean something like this ? 
`  bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
                               unsigned Mask) {
    auto MBB = MI->getParent();
    if (MI == MBB->instr_begin())
      return false;

    MachineBasicBlock::instr_iterator It = std::prev(MI);
    while (It != MBB->instr_begin() && It->isDebugInstr())
      --It;
    if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
        It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
        return true;
    }
    
    return false;
  }`

https://github.com/llvm/llvm-project/pull/128916


More information about the llvm-commits mailing list