[PATCH] D78255: [AMDGPU] copyPhysReg() for 16 bit SGPR subregs
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 15 17:06:58 PDT 2020
rampitec created this revision.
rampitec added a reviewer: arsenm.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
rampitec added parent revisions: D78150: [AMDGPU] Use SDWA for 16 bit subreg copy, D78250: [AMDGPU] Define 16 bit SGPR subregs.
https://reviews.llvm.org/D78255
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
Index: llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
@@ -0,0 +1,26 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_lo_sgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $vgpr1_lo16 = COPY $sgpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_hi_sgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $vgpr1_hi16 = COPY killed $sgpr0_lo16
+ S_ENDPGM 0
+...
Index: llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
+++ llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
@@ -193,3 +193,14 @@
$vgpr2 = COPY killed $vgpr1
S_ENDPGM 0
...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
+# GCN: s_mov_b32 s1, s0
+name: lo_to_lo_sgpr_to_sgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $sgpr1_lo16 = COPY $sgpr0_lo16
+ S_ENDPGM 0
+...
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -681,16 +681,20 @@
if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
- AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
+ AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+ AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+ bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
- bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
+ bool SrcLow = IsSGPRSrc || AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
+ const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
DestReg = RI.getMatchingSuperReg(DestReg,
DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
&AMDGPU::VGPR_32RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg,
SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
- &AMDGPU::VGPR_32RegClass);
+ SrcRC);
auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
.addImm(0) // src0_modifiers
@@ -707,6 +711,18 @@
return;
}
+ if (RC == &AMDGPU::SGPR_LO16RegClass) {
+ assert(AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+
+ DestReg = RI.getMatchingSuperReg(DestReg, AMDGPU::lo16,
+ &AMDGPU::SGPR_32RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AMDGPU::lo16,
+ &AMDGPU::SGPR_32RegClass);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isSGPRClass(RC)) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D78255.257894.patch
Type: text/x-patch
Size: 3839 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200416/bfdffdf2/attachment.bin>
More information about the llvm-commits
mailing list