[PATCH] D78255: [AMDGPU] copyPhysReg() for 16 bit SGPR subregs

Wed Apr 15 17:06:58 PDT 2020

rampitec created this revision.
rampitec added a reviewer: arsenm.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
rampitec added parent revisions: D78150: [AMDGPU] Use SDWA for 16 bit subreg copy, D78250: [AMDGPU] Define 16 bit SGPR subregs.

https://reviews.llvm.org/D78255

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
  llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir


Index: llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
@@ -0,0 +1,26 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_lo_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_hi_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...
Index: llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
+++ llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
@@ -193,3 +193,14 @@
     $vgpr2 = COPY killed $vgpr1
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
+# GCN: s_mov_b32 s1, s0
+name: lo_to_lo_sgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -681,16 +681,20 @@
 
   if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
     assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
-           AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
+           AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+           AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
 
+    bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
     bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
-    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
+    bool SrcLow = IsSGPRSrc || AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
+    const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
     DestReg = RI.getMatchingSuperReg(DestReg,
                                      DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
                                      &AMDGPU::VGPR_32RegClass);
     SrcReg = RI.getMatchingSuperReg(SrcReg,
                                     SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                    &AMDGPU::VGPR_32RegClass);
+                                    SrcRC);
 
     auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
       .addImm(0) // src0_modifiers
@@ -707,6 +711,18 @@
     return;
   }
 
+  if (RC == &AMDGPU::SGPR_LO16RegClass) {
+    assert(AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+
+    DestReg = RI.getMatchingSuperReg(DestReg, AMDGPU::lo16,
+                                     &AMDGPU::SGPR_32RegClass);
+    SrcReg = RI.getMatchingSuperReg(SrcReg, AMDGPU::lo16,
+                                    &AMDGPU::SGPR_32RegClass);
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
   unsigned EltSize = 4;
   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
   if (RI.isSGPRClass(RC)) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D78255.257894.patch
Type: text/x-patch
Size: 3839 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200416/bfdffdf2/attachment.bin>