[llvm] r287131 - AMDGPU/SI: Avoid creating unnecessary copies in the SIFixSGPRCopies pass
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 16 10:42:17 PST 2016
Author: tstellar
Date: Wed Nov 16 12:42:17 2016
New Revision: 287131
URL: http://llvm.org/viewvc/llvm-project?rev=287131&view=rev
Log:
AMDGPU/SI: Avoid creating unnecessary copies in the SIFixSGPRCopies pass
Summary:
1. Don't try to copy values to and from the same register class.
2. Replace copies with of registers with immediate values with v_mov/s_mov
instructions.
The main purpose of this change is to make MachineSink do a better job of
determining when it is beneficial to split a critical edge, since the pass
assumes that copies will become move instructions.
This prevents a regression in uniform-cfg.ll if we enable critical edge
splitting for AMDGPU.
Reviewers: arsenm
Subscribers: arsenm, kzhuravl, llvm-commits
Differential Revision: https://reviews.llvm.org/D23408
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Nov 16 12:42:17 2016
@@ -2546,6 +2546,39 @@ void SIInstrInfo::legalizeOperandsSMRD(M
}
}
+void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
+ MachineBasicBlock::iterator I,
+ const TargetRegisterClass *DstRC,
+ MachineOperand &Op,
+ MachineRegisterInfo &MRI,
+ const DebugLoc &DL) const {
+
+ unsigned OpReg = Op.getReg();
+ unsigned OpSubReg = Op.getSubReg();
+
+ const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
+ RI.getRegClassForReg(MRI, OpReg), OpSubReg);
+
+ // Check if operand is already the correct register class.
+ if (DstRC == OpRC)
+ return;
+
+ unsigned DstReg = MRI.createVirtualRegister(DstRC);
+ MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg)
+ .addOperand(Op);
+
+ Op.setReg(DstReg);
+ Op.setSubReg(0);
+
+ MachineInstr *Def = MRI.getVRegDef(OpReg);
+ if (!Def)
+ return;
+
+ // Try to eliminate the copy if it is copying an immediate value.
+ if (Def->isMoveImmediate())
+ FoldImmediate(*Copy, *Def, OpReg, &MRI);
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -2603,15 +2636,14 @@ void SIInstrInfo::legalizeOperands(Machi
MachineOperand &Op = MI.getOperand(I);
if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
continue;
- unsigned DstReg = MRI.createVirtualRegister(RC);
// MI is a PHI instruction.
MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
- BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg)
- .addOperand(Op);
- Op.setReg(DstReg);
+ // Avoid creating no-op copies with the same src and dst reg class. These
+ // confuse some of the machine passes.
+ legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
}
}
@@ -2635,12 +2667,7 @@ void SIInstrInfo::legalizeOperands(Machi
if (VRC == OpRC)
continue;
- unsigned DstReg = MRI.createVirtualRegister(VRC);
-
- BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg)
- .addOperand(Op);
-
- Op.setReg(DstReg);
+ legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Op.setIsKill();
}
}
@@ -2656,11 +2683,9 @@ void SIInstrInfo::legalizeOperands(Machi
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
if (DstRC != Src0RC) {
- MachineBasicBlock &MBB = *MI.getParent();
- unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
- .addReg(Src0);
- MI.getOperand(1).setReg(NewSrc0);
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineOperand &Op = MI.getOperand(1);
+ legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
}
return;
}
@@ -3000,6 +3025,22 @@ void SIInstrInfo::moveToVALU(MachineInst
continue;
unsigned DstReg = Inst.getOperand(0).getReg();
+ if (Inst.isCopy() &&
+ TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
+ NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
+ // Instead of creating a copy where src and dst are the same register
+ // class, we just replace all uses of dst with src. These kinds of
+ // copies interfere with the heuristics MachineSink uses to decide
+ // whether or not to split a critical edge. Since the pass assumes
+ // that copies will end up as machine instructions and not be
+ // eliminated.
+ addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
+ MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
+ MRI.clearKillFlags(Inst.getOperand(1).getReg());
+ Inst.getOperand(0).setReg(DstReg);
+ continue;
+ }
+
NewDstReg = MRI.createVirtualRegister(NewDstRC);
MRI.replaceRegWith(DstReg, NewDstReg);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Wed Nov 16 12:42:17 2016
@@ -567,6 +567,12 @@ public:
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
+ void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
+ MachineBasicBlock::iterator I,
+ const TargetRegisterClass *DstRC,
+ MachineOperand &Op, MachineRegisterInfo &MRI,
+ const DebugLoc &DL) const;
+
/// \brief Legalize all operands in this instruction. This function may
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr &MI) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Wed Nov 16 12:42:17 2016
@@ -1059,17 +1059,6 @@ SIRegisterInfo::findUnusedRegister(const
return AMDGPU::NoRegister;
}
-bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
- const TargetRegisterClass *RC;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- RC = MRI.getRegClass(Reg);
- else
- RC = getPhysRegClass(Reg);
-
- return hasVGPRs(RC);
-}
-
unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 800;
@@ -1363,3 +1352,17 @@ ArrayRef<int16_t> SIRegisterInfo::getReg
llvm_unreachable("unhandled register size");
}
}
+
+const TargetRegisterClass*
+SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI.getRegClass(Reg);
+
+ return getPhysRegClass(Reg);
+}
+
+bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ return hasVGPRs(getRegClassForReg(MRI, Reg));
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Wed Nov 16 12:42:17 2016
@@ -172,6 +172,8 @@ public:
unsigned getSGPRPressureSet() const { return SGPRSetID; };
unsigned getVGPRPressureSet() const { return VGPRSetID; };
+ const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
+ unsigned Reg) const;
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
bool isSGPRPressureSet(unsigned SetID) const {
Modified: llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll Wed Nov 16 12:42:17 2016
@@ -108,9 +108,9 @@ endif:
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:12 ; 8-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 8-byte Folded Spill
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
@@ -133,11 +133,11 @@ endif:
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:12 ; 8-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Wed Nov 16 12:42:17 2016
@@ -207,7 +207,6 @@ define void @dynamic_insertelement_v3i16
; GCN: buffer_load_ushort v{{[0-9]+}}, off
; GCN: buffer_load_ushort v{{[0-9]+}}, off
-; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
@@ -418,7 +417,7 @@ define void @dynamic_insertelement_v4f64
}
; GCN-LABEL: {{^}}dynamic_insertelement_v8f64:
-; GCN: SCRATCH_RSRC_DWORD
+; GCN-DAG: SCRATCH_RSRC_DWORD
; FIXME: Should be able to eliminate this?
@@ -426,8 +425,8 @@ define void @dynamic_insertelement_v4f64
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
-; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 0{{$}}
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; GCN-DAG: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll Wed Nov 16 12:42:17 2016
@@ -48,7 +48,9 @@ done:
; GCN-LABEL: {{^}}legal_offset_fi_offset:
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
-; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000
+; This constant isn't folded, because it has multiple uses.
+; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8000
+; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll?rev=287131&r1=287130&r2=287131&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll Wed Nov 16 12:42:17 2016
@@ -3,7 +3,7 @@
; register operands in the correct order when modifying the opcode of an
; instruction to V_ADD_I32_e32.
-; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 killed %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
+; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
More information about the llvm-commits
mailing list