[llvm] c85eda7 - [AMDGPU] fix copies between 32 and 16 bit

Mon May 4 08:54:33 PDT 2020

Author: Stanislav Mekhanoshin
Date: 2020-05-04T08:54:22-07:00
New Revision: c85eda74b8581ee068431b73937f1aeeed455698

URL: https://github.com/llvm/llvm-project/commit/c85eda74b8581ee068431b73937f1aeeed455698
DIFF: https://github.com/llvm/llvm-project/commit/c85eda74b8581ee068431b73937f1aeeed455698.diff

LOG: [AMDGPU] fix copies between 32 and 16 bit

This a hack to fix illegal 32 to 16 bit copies.
The problem is when we make 16 bit subregs legal it creates
a huge amount of failures which can only be resolved at once
without a temporary hack like this.

The next step is to change operands, instruction definitions
and patterns until this hack is not needed.

Differential Revision: https://reviews.llvm.org/D79119

Added: 
    llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 22e3c530c2d5..e0b23f2aafd3 100644

--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -83,6 +83,12 @@ static cl::opt<unsigned>
 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
                  cl::desc("Restrict range of branch instructions (DEBUG)"));
 
+static cl::opt<bool> Fix16BitCopies(
+  "amdgpu-fix-16-bit-physreg-copies",
+  cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"),
+  cl::init(true),
+  cl::ReallyHidden);
+
 SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
     RI(ST), ST(ST) {
@@ -527,6 +533,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                               MCRegister SrcReg, bool KillSrc) const {
   const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
 
+  // FIXME: This is hack to resolve copies between 16 bit and 32 bit
+  // registers until all patterns are fixed.
+  if (Fix16BitCopies &&
+      ((RI.getRegSizeInBits(*RC) == 16) ^
+       (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) {
+    MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
+    MCRegister Super = RI.get32BitRegister(RegToFix);
+    assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
+    RegToFix = Super;
+
+    if (DestReg == SrcReg) {
+      // Insert empty bundle since ExpandPostRA expects an instruction here.
+      BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
+      return;
+    }
+
+    RC = RI.getPhysRegClass(DestReg);
+  }
+
   if (RC == &AMDGPU::VGPR_32RegClass) {
     assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
            AMDGPU::SReg_32RegClass.contains(SrcReg) ||

diff  --git a/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
new file mode 100644
index 000000000000..074f5de9224e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
@@ -0,0 +1,36 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos -amdgpu-fix-16-bit-physreg-copies -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: lo16_to_v32
+# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+name: lo16_to_v32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $vgpr0
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: v32_to_lo16
+# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+name: v32_to_lo16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: samereg
+# GCN:      $vgpr0 = IMPLICIT_DEF
+# GCN-NEXT: BUNDLE
+# GCN-NEXT: S_ENDPGM
+name: samereg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr0 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...