[llvm] r309766 - [AMDGPU] Turn s_and_saveexec_b64 into s_and_b64 if result is unused

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 16:44:35 PDT 2017


Author: rampitec
Date: Tue Aug  1 16:44:35 2017
New Revision: 309766

URL: http://llvm.org/viewvc/llvm-project?rev=309766&view=rev
Log:
[AMDGPU] Turn s_and_saveexec_b64 into s_and_b64 if result is unused

With SI_END_CF elimination for some nested control flow we can now
eliminate saved exec register completely by turning a saveexec version
of instruction into just a logical instruction.

Differential Revision: https://reviews.llvm.org/D36007

Added:
    llvm/trunk/test/CodeGen/AMDGPU/reduce-saveexec.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
    llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
    llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp?rev=309766&r1=309765&r2=309766&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp Tue Aug  1 16:44:35 2017
@@ -87,6 +87,30 @@ static unsigned isCopyToExec(const Machi
   return AMDGPU::NoRegister;
 }
 
+/// If \p MI is a logical operation on an exec value,
+/// return the register copied to.
+static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_AND_B64:
+  case AMDGPU::S_OR_B64:
+  case AMDGPU::S_XOR_B64:
+  case AMDGPU::S_ANDN2_B64:
+  case AMDGPU::S_ORN2_B64:
+  case AMDGPU::S_NAND_B64:
+  case AMDGPU::S_NOR_B64:
+  case AMDGPU::S_XNOR_B64: {
+    const MachineOperand &Src1 = MI.getOperand(1);
+    if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
+      return MI.getOperand(0).getReg();
+    const MachineOperand &Src2 = MI.getOperand(2);
+    if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
+      return MI.getOperand(0).getReg();
+  }
+  }
+
+  return AMDGPU::NoRegister;
+}
+
 static unsigned getSaveExecOp(unsigned Opc) {
   switch (Opc) {
   case AMDGPU::S_AND_B64:
@@ -209,8 +233,24 @@ bool SIOptimizeExecMasking::runOnMachine
     // Scan backwards to find the def.
     auto CopyToExecInst = &*I;
     auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
-    if (CopyFromExecInst == E)
+    if (CopyFromExecInst == E) {
+      auto PrepareExecInst = std::next(I);
+      if (PrepareExecInst == E)
+        continue;
+      // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
+      if (CopyToExecInst->getOperand(1).isKill() &&
+          isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
+        DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
+
+        PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
+
+        DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
+
+        CopyToExecInst->eraseFromParent();
+      }
+
       continue;
+    }
 
     if (isLiveOut(MBB, CopyToExec)) {
       // The copied register is live out and has a second use in another block.

Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp?rev=309766&r1=309765&r2=309766&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp Tue Aug  1 16:44:35 2017
@@ -147,6 +147,30 @@ bool SIOptimizeExecMaskingPreRA::runOnMa
     }
 
     Changed = true;
+
+    // If the only use of saved exec in the removed instruction is S_AND_B64
+    // fold the copy now.
+    auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
+    if (!SaveExec || !SaveExec->isFullCopy())
+      continue;
+
+    unsigned SavedExec = SaveExec->getOperand(0).getReg();
+    bool SafeToReplace = true;
+    for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
+      if (U.getParent() != SaveExec->getParent()) {
+        SafeToReplace = false;
+        break;
+      }
+
+      DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
+    }
+
+    if (SafeToReplace) {
+      LIS->RemoveMachineInstrFromMaps(*SaveExec);
+      SaveExec->eraseFromParent();
+      MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
+      LIS->removeInterval(SavedExec);
+    }
   }
 
   if (Changed) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll?rev=309766&r1=309765&r2=309766&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll Tue Aug  1 16:44:35 2017
@@ -4,7 +4,7 @@
 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
 ; GCN-NEXT: s_cbranch_execz [[ENDIF]]
-; GCN:      s_and_saveexec_b64
+; GCN:      s_and_b64 exec, exec, vcc
 ; GCN-NEXT: ; mask branch [[ENDIF]]
 ; GCN-NEXT: {{^BB[0-9_]+}}:
 ; GCN:      store_dword

Added: llvm/trunk/test/CodeGen/AMDGPU/reduce-saveexec.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reduce-saveexec.mir?rev=309766&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reduce-saveexec.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/reduce-saveexec.mir Tue Aug  1 16:44:35 2017
@@ -0,0 +1,147 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+# GCN-LABEL: name: reduce_and_saveexec
+# GCN:      %exec = S_AND_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_and_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_and_saveexec_commuted
+# GCN:      %exec = S_AND_B64 killed %vcc, %exec
+# GCN-NEXT: S_ENDPGM
+name: reduce_and_saveexec_commuted
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_AND_B64 killed %vcc, %exec, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_and_saveexec_liveout
+# GCN:      %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc
+# GCN-NEXT: %exec = COPY
+name: reduce_and_saveexec_liveout
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_saveexec
+# GCN:      %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc
+# GCN-NEXT: S_ENDPGM
+name: and_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = COPY %exec
+    %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
+    %exec = S_MOV_B64_term %sgpr2_sgpr3
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_or_saveexec
+# GCN:      %exec = S_OR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_or_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_xor_saveexec
+# GCN:      %exec = S_XOR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_xor_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_andn2_saveexec
+# GCN:      %exec = S_ANDN2_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_andn2_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_ANDN2_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_orn2_saveexec
+# GCN:      %exec = S_ORN2_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_orn2_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_ORN2_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_nand_saveexec
+# GCN:      %exec = S_NAND_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_nand_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_NAND_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_nor_saveexec
+# GCN:      %exec = S_NOR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_nor_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_NOR_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_xnor_saveexec
+# GCN:      %exec = S_XNOR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_xnor_saveexec
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_XNOR_B64 %exec, killed %vcc, implicit-def %scc
+    %exec = COPY killed %sgpr0_sgpr1
+    S_ENDPGM
+...
+---




More information about the llvm-commits mailing list