[llvm] c85eda7 - [AMDGPU] fix copies between 32 and 16 bit
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon May 4 08:54:33 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-05-04T08:54:22-07:00
New Revision: c85eda74b8581ee068431b73937f1aeeed455698
URL: https://github.com/llvm/llvm-project/commit/c85eda74b8581ee068431b73937f1aeeed455698
DIFF: https://github.com/llvm/llvm-project/commit/c85eda74b8581ee068431b73937f1aeeed455698.diff
LOG: [AMDGPU] fix copies between 32 and 16 bit
This a hack to fix illegal 32 to 16 bit copies.
The problem is when we make 16 bit subregs legal it creates
a huge amount of failures which can only be resolved at once
without a temporary hack like this.
The next step is to change operands, instruction definitions
and patterns until this hack is not needed.
Differential Revision: https://reviews.llvm.org/D79119
Added:
llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 22e3c530c2d5..e0b23f2aafd3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -83,6 +83,12 @@ static cl::opt<unsigned>
BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));
+static cl::opt<bool> Fix16BitCopies(
+ "amdgpu-fix-16-bit-physreg-copies",
+ cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"),
+ cl::init(true),
+ cl::ReallyHidden);
+
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
RI(ST), ST(ST) {
@@ -527,6 +533,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MCRegister SrcReg, bool KillSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
+ // FIXME: This is hack to resolve copies between 16 bit and 32 bit
+ // registers until all patterns are fixed.
+ if (Fix16BitCopies &&
+ ((RI.getRegSizeInBits(*RC) == 16) ^
+ (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) {
+ MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
+ MCRegister Super = RI.get32BitRegister(RegToFix);
+ assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
+ RegToFix = Super;
+
+ if (DestReg == SrcReg) {
+ // Insert empty bundle since ExpandPostRA expects an instruction here.
+ BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
+ return;
+ }
+
+ RC = RI.getPhysRegClass(DestReg);
+ }
+
if (RC == &AMDGPU::VGPR_32RegClass) {
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg) ||
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
new file mode 100644
index 000000000000..074f5de9224e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
@@ -0,0 +1,36 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos -amdgpu-fix-16-bit-physreg-copies -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: lo16_to_v32
+# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+name: lo16_to_v32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1_lo16 = COPY $vgpr0
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: v32_to_lo16
+# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+name: v32_to_lo16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = COPY $vgpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: samereg
+# GCN: $vgpr0 = IMPLICIT_DEF
+# GCN-NEXT: BUNDLE
+# GCN-NEXT: S_ENDPGM
+name: samereg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr0 = COPY $vgpr0_lo16
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list