[llvm] b726d07 - [AMDGPU] Reject moving PHI to VALU if the only VGPR input originated from move immediate

Thu May 28 09:26:08 PDT 2020

Author: alex-t
Date: 2020-05-28T19:25:51+03:00
New Revision: b726d071b4aa46004228fc38ee5bfd167f999bfe

URL: https://github.com/llvm/llvm-project/commit/b726d071b4aa46004228fc38ee5bfd167f999bfe
DIFF: https://github.com/llvm/llvm-project/commit/b726d071b4aa46004228fc38ee5bfd167f999bfe.diff

LOG: [AMDGPU] Reject moving PHI to VALU if the only VGPR input originated from move immediate

Summary:
PHIs result register class is set to VGPR or SGPR depending on the cross block value divergence.
         In some cases uniform PHI need to be converted to return VGPR to prevent the oddnumber of moves values from VGPR to SGPR and back.
         PHI should certainly return VGPR if it has at least one VGPR input. This change adds the exception.
         We don't want to convert uniform PHI to VGPRs in case the only VGPR input is a VGPR to SGPR COPY and definition od the
         source VGPR in this COPY is move immediate.

  bb.0:

     %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %2:sreg_32 = .....

  bb.1:
     %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
     S_BRANCH %bb.3

  bb.3:
     %1:sreg_32 = COPY %0
     S_BRANCH %bb.2

Reviewers: rampitec

Reviewed By: rampitec

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80434

Added: 
    llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 7283c33fe985..ef64c5674bd1 100644

--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -835,8 +835,22 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
     }
     else if (Def->isCopy() &&
       TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
-      hasVGPRInput = true;
-      break;
+      Register SrcReg = Def->getOperand(1).getReg();
+      MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
+      unsigned SMovOp;
+      int64_t Imm;
+      if (!isSafeToFoldImmIntoCopy(Def, SrcDef, TII, SMovOp, Imm)) {
+        hasVGPRInput = true;
+        break;
+      } else {
+        // Formally, if we did not do this right away
+        // it would be done on the next iteration of the
+        // runOnMachineFunction main loop. But why not if we can?
+        MachineFunction *MF = MI.getParent()->getParent();
+        Def->getOperand(1).ChangeToImmediate(Imm);
+        Def->addImplicitDefUseOperands(*MF);
+        Def->setDesc(TII->get(SMovOp));
+      }
     }
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
new file mode 100644
index 000000000000..ff061dec039a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
@@ -0,0 +1,97 @@
+# RUN: llc  -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s | FileCheck -check-prefix=GCN %s
+---
+# GCN_LABEL: phi_moveimm_input
+# GCN-NOT: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+# GCN:     %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+
+name:            phi_moveimm_input
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0, $sgpr1
+
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    %4:sreg_32 = COPY $sgpr0
+    %5:sreg_32 = COPY $sgpr1
+
+  bb.1:
+    successors: %bb.2
+    %2:sreg_32 =  S_ADD_U32 %4, %5, implicit-def $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3
+    %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.2
+    %1:sreg_32 = COPY %0
+    S_BRANCH %bb.2
+...
+
+---
+# GCN_LABEL: phi_moveimm_subreg_input
+# GCN-NOT: %{{[0-9]+}}:sreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+# GCN: %{{[0-9]+}}:vreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+name:            phi_moveimm_subreg_input
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0, $sgpr1
+
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    %4:sreg_32 = COPY $sgpr0
+    %5:sreg_32 = COPY $sgpr1
+
+  bb.1:
+    successors: %bb.2
+    undef %2.sub0:sreg_64 =  S_ADD_U32 %4, %5, implicit-def $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3
+    %3:sreg_64 = PHI %1, %bb.3, %2, %bb.1
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.2
+    undef %1.sub0:sreg_64 = COPY %0
+    S_BRANCH %bb.2
+...
+
+
+---
+# GCN_LABEL: phi_moveimm_bad_opcode_input
+# GCN-NOT: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+# GCN: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+name:            phi_moveimm_bad_opcode_input
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0, $sgpr1, $vgpr0
+    %6:vgpr_32 = COPY $vgpr0
+    %0:vgpr_32 = V_MOV_B32_sdwa 0, %6:vgpr_32, 0, 5, 2, 4,  implicit $exec, implicit %6:vgpr_32(tied-def 0)
+
+    %4:sreg_32 = COPY $sgpr0
+    %5:sreg_32 = COPY $sgpr1
+
+  bb.1:
+
+    successors: %bb.2
+    %2:sreg_32 =  S_ADD_U32 %4, %5, implicit-def $scc
+    S_BRANCH %bb.2
+  bb.2:
+    successors: %bb.3
+    %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
+    S_BRANCH %bb.3
+  bb.3:
+    successors: %bb.2
+    %1:sreg_32 = COPY %0
+    S_BRANCH %bb.2
+...