[llvm] 992fbce - [AMDGPU] copyPhysReg() for 16 bit SGPR subregs

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 17 11:59:50 PDT 2020


Author: Stanislav Mekhanoshin
Date: 2020-04-17T11:59:39-07:00
New Revision: 992fbce4e9b034e752dcb4e1be0306b49bbc6b19

URL: https://github.com/llvm/llvm-project/commit/992fbce4e9b034e752dcb4e1be0306b49bbc6b19
DIFF: https://github.com/llvm/llvm-project/commit/992fbce4e9b034e752dcb4e1be0306b49bbc6b19.diff

LOG: [AMDGPU] copyPhysReg() for 16 bit SGPR subregs

Differential Revision: https://reviews.llvm.org/D78255

Added: 
    llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
    llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 92d0440a5806..60569dfbbe1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -510,11 +510,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
 static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, MCRegister DestReg,
-                              MCRegister SrcReg, bool KillSrc) {
+                              MCRegister SrcReg, bool KillSrc,
+                              const char *Msg = "illegal SGPR to VGPR copy") {
   MachineFunction *MF = MBB.getParent();
-  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
-                                        "illegal SGPR to VGPR copy",
-                                        DL, DS_Error);
+  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
   LLVMContext &C = MF->getFunction().getContext();
   C.diagnose(IllegalCopy);
 
@@ -679,29 +678,61 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
+  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
+      RC == &AMDGPU::SGPR_LO16RegClass) {
     assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
-           AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
-
-    bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
-    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
-    DestReg = RI.getMatchingSuperReg(DestReg,
-                                     DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                     &AMDGPU::VGPR_32RegClass);
-    SrcReg = RI.getMatchingSuperReg(SrcReg,
-                                    SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                    &AMDGPU::VGPR_32RegClass);
-
-    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
+           AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+           AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+
+    bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
+    bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+    bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
+                   RC == &AMDGPU::SGPR_LO16RegClass);
+    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
+                  AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+    const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
+    const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
+    MCRegister NewDestReg =
+      RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
+                             DstRC);
+    MCRegister NewSrcReg =
+      RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
+                             SrcRC);
+
+    if (IsSGPRDst) {
+      if (!IsSGPRSrc) {
+        reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+        return;
+      }
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
+        .addReg(NewSrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+    if (IsSGPRSrc && !ST.hasSDWAScalar()) {
+      if (!DstLow || !SrcLow) {
+        reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+                          "Cannot use hi16 subreg on VI!");
+      }
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
+        .addReg(NewSrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
       .addImm(0) // src0_modifiers
-      .addReg(SrcReg)
+      .addReg(NewSrcReg)
       .addImm(0) // clamp
       .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
                      : AMDGPU::SDWA::SdwaSel::WORD_1)
       .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
       .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
                      : AMDGPU::SDWA::SdwaSel::WORD_1)
-      .addReg(DestReg, RegState::Implicit | RegState::Undef);
+      .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
     // First implicit operand is $exec.
     MIB->tieOperands(0, MIB->getNumOperands() - 1);
     return;

diff  --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
new file mode 100644
index 000000000000..40bfd60e8ccf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
@@ -0,0 +1,31 @@
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr:
+# GCN: ; illegal copy v0.l to s1.l
+# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy
+name: lo_to_lo_illegal_vgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GFX8: ; illegal copy s0.l to v1.h
+# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+# GFX8-ERR: error: <unknown>:0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI!
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
index f5c507be361d..f5b7f110ea20 100644
--- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
@@ -193,3 +193,14 @@ body:             |
     $vgpr2 = COPY killed $vgpr1
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
+# GCN: s_mov_b32 s1, s0
+name: lo_to_lo_sgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
new file mode 100644
index 000000000000..21fc79d499bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
@@ -0,0 +1,26 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_lo_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list