[llvm-commits] [llvm] r137133 - in /llvm/trunk: lib/CodeGen/RegisterCoalescer.cpp test/CodeGen/ARM/fabss.ll test/CodeGen/ARM/fp_convert.ll
Jakob Stoklund Olesen
stoklund at 2pi.dk
Tue Aug 9 11:19:41 PDT 2011
Author: stoklund
Date: Tue Aug 9 13:19:41 2011
New Revision: 137133
URL: http://llvm.org/viewvc/llvm-project?rev=137133&view=rev
Log:
Inflate register classes after coalescing.
Coalescing can remove copy-like instructions with sub-register operands
that constrained the register class. Examples are:
x86: GR32_ABCD:sub_8bit_hi -> GR32
arm: DPR_VFP2:ssub0 -> DPR
Recompute the register class of any virtual registers that are used by
less instructions after coalescing.
This affects code generation for the Cortex-A8 where we use NEON
instructions for f32 operations, c.f. fp_convert.ll:
vadd.f32 d16, d1, d0
vcvt.s32.f32 d0, d16
The register allocator is now free to use d16 for the temporary, and
that comes first in the allocation order because it doesn't interfere
with any s-registers.
Modified:
llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp
llvm/trunk/test/CodeGen/ARM/fabss.ll
llvm/trunk/test/CodeGen/ARM/fp_convert.ll
Modified: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp?rev=137133&r1=137132&r2=137133&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp Tue Aug 9 13:19:41 2011
@@ -55,6 +55,7 @@
STATISTIC(NumReMats , "Number of instructions re-materialized");
STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
static cl::opt<bool>
EnableJoining("join-liveintervals",
@@ -1852,7 +1853,7 @@
// Perform a final pass over the instructions and compute spill weights
// and remove identity moves.
- SmallVector<unsigned, 4> DeadDefs;
+ SmallVector<unsigned, 4> DeadDefs, InflateRegs;
for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
MachineBasicBlock* mbb = mbbi;
@@ -1864,6 +1865,16 @@
bool DoDelete = true;
assert(MI->isCopyLike() && "Unrecognized copy instruction");
unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Collect candidates for register class inflation.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+ InflateRegs.push_back(SrcReg);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+ InflateRegs.push_back(DstReg);
+
if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
MI->getNumOperands() > 2)
// Do not delete extract_subreg, insert_subreg of physical
@@ -1905,8 +1916,12 @@
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
DeadDefs.push_back(Reg);
+ // Remat may also enable register class inflation.
+ if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+ InflateRegs.push_back(Reg);
+ }
if (MO.isDead())
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
@@ -1954,6 +1969,24 @@
}
}
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg, *TM)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << MRI->getRegClass(Reg)->getName() << '\n');
+ ++NumInflated;
+ }
+ }
+
DEBUG(dump());
DEBUG(LDV->dump());
if (VerifyCoalescing)
Modified: llvm/trunk/test/CodeGen/ARM/fabss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fabss.ll?rev=137133&r1=137132&r2=137133&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fabss.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fabss.ll Tue Aug 9 13:19:41 2011
@@ -22,6 +22,8 @@
; NFP0: vabs.f32 s1, s1
; CORTEXA8: test:
-; CORTEXA8: vabs.f32 d1, d1
+; CORTEXA8: vadd.f32 [[D1:d[0-9]+]]
+; CORTEXA8: vabs.f32 {{d[0-9]+}}, [[D1]]
+
; CORTEXA9: test:
; CORTEXA9: vabs.f32 s{{.}}, s{{.}}
Modified: llvm/trunk/test/CodeGen/ARM/fp_convert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fp_convert.ll?rev=137133&r1=137132&r2=137133&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fp_convert.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fp_convert.ll Tue Aug 9 13:19:41 2011
@@ -7,7 +7,8 @@
; VFP2: test1:
; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
; NEON: test1:
-; NEON: vcvt.s32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.s32.f32 d0, [[D0]]
entry:
%0 = fadd float %a, %b
%1 = fptosi float %0 to i32
@@ -18,7 +19,8 @@
; VFP2: test2:
; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
; NEON: test2:
-; NEON: vcvt.u32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.u32.f32 d0, [[D0]]
entry:
%0 = fadd float %a, %b
%1 = fptoui float %0 to i32
More information about the llvm-commits
mailing list