[llvm] 992fbce - [AMDGPU] copyPhysReg() for 16 bit SGPR subregs
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 17 11:59:50 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-04-17T11:59:39-07:00
New Revision: 992fbce4e9b034e752dcb4e1be0306b49bbc6b19
URL: https://github.com/llvm/llvm-project/commit/992fbce4e9b034e752dcb4e1be0306b49bbc6b19
DIFF: https://github.com/llvm/llvm-project/commit/992fbce4e9b034e752dcb4e1be0306b49bbc6b19.diff
LOG: [AMDGPU] copyPhysReg() for 16 bit SGPR subregs
Differential Revision: https://reviews.llvm.org/D78255
Added:
llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 92d0440a5806..60569dfbbe1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -510,11 +510,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
- MCRegister SrcReg, bool KillSrc) {
+ MCRegister SrcReg, bool KillSrc,
+ const char *Msg = "illegal SGPR to VGPR copy") {
MachineFunction *MF = MBB.getParent();
- DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
- "illegal SGPR to VGPR copy",
- DL, DS_Error);
+ DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
LLVMContext &C = MF->getFunction().getContext();
C.diagnose(IllegalCopy);
@@ -679,29 +678,61 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
+ if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
+ RC == &AMDGPU::SGPR_LO16RegClass) {
assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
- AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
-
- bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
- bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
- DestReg = RI.getMatchingSuperReg(DestReg,
- DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
- &AMDGPU::VGPR_32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg,
- SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
- &AMDGPU::VGPR_32RegClass);
-
- auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
+ AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+ AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+
+ bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
+ bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+ bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
+ RC == &AMDGPU::SGPR_LO16RegClass);
+ bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+ const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
+ const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
+ MCRegister NewDestReg =
+ RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
+ DstRC);
+ MCRegister NewSrcReg =
+ RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
+ SrcRC);
+
+ if (IsSGPRDst) {
+ if (!IsSGPRSrc) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
+ .addReg(NewSrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (IsSGPRSrc && !ST.hasSDWAScalar()) {
+ if (!DstLow || !SrcLow) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+ "Cannot use hi16 subreg on VI!");
+ }
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
+ .addReg(NewSrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
.addImm(0) // src0_modifiers
- .addReg(SrcReg)
+ .addReg(NewSrcReg)
.addImm(0) // clamp
.addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
: AMDGPU::SDWA::SdwaSel::WORD_1)
.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
.addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
: AMDGPU::SDWA::SdwaSel::WORD_1)
- .addReg(DestReg, RegState::Implicit | RegState::Undef);
+ .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
// First implicit operand is $exec.
MIB->tieOperands(0, MIB->getNumOperands() - 1);
return;
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
new file mode 100644
index 000000000000..40bfd60e8ccf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
@@ -0,0 +1,31 @@
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr:
+# GCN: ; illegal copy v0.l to s1.l
+# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy
+name: lo_to_lo_illegal_vgpr_to_sgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $sgpr1_lo16 = COPY $vgpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GFX8: ; illegal copy s0.l to v1.h
+# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+# GFX8-ERR: error: <unknown>:0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI!
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $vgpr1_hi16 = COPY killed $sgpr0_lo16
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
index f5c507be361d..f5b7f110ea20 100644
--- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
@@ -193,3 +193,14 @@ body: |
$vgpr2 = COPY killed $vgpr1
S_ENDPGM 0
...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
+# GCN: s_mov_b32 s1, s0
+name: lo_to_lo_sgpr_to_sgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $sgpr1_lo16 = COPY $sgpr0_lo16
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
new file mode 100644
index 000000000000..21fc79d499bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
@@ -0,0 +1,26 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_lo_sgpr_to_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $vgpr1_lo16 = COPY $sgpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $vgpr1_hi16 = COPY killed $sgpr0_lo16
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list