[llvm-commits] [llvm] r101971 - in /llvm/trunk: include/llvm/CodeGen/LiveIntervalAnalysis.h lib/CodeGen/SimpleRegisterCoalescing.cpp lib/CodeGen/SimpleRegisterCoalescing.h test/CodeGen/Thumb2/cross-rc-coalescing-2.ll test/CodeGen/X86/2008-10-16-SpillerBug.ll test/CodeGen/X86/postra-licm.ll test/CodeGen/X86/stack-color-with-reg.ll
Evan Cheng
evan.cheng at apple.com
Tue Apr 20 17:44:22 PDT 2010
Author: evancheng
Date: Tue Apr 20 19:44:22 2010
New Revision: 101971
URL: http://llvm.org/viewvc/llvm-project?rev=101971&view=rev
Log:
- Clean up some crappy code which deals with coalescing of copies which look at
extract_subreg / insert_subreg, etc.
- Add support for more aggressive insert_subreg coalescing.
Modified:
llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h
llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll
llvm/trunk/test/CodeGen/X86/postra-licm.ll
llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll
Modified: llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h Tue Apr 20 19:44:22 2010
@@ -111,6 +111,12 @@
double getScaledIntervalSize(LiveInterval& I) {
return (1000.0 * I.getSize()) / indexes_->getIndexesLength();
}
+
+ /// getFuncInstructionCount - Return the number of instructions in the
+ /// current function.
+ unsigned getFuncInstructionCount() {
+ return indexes_->getFunctionSize();
+ }
/// getApproximateInstructionCount - computes an estimate of the number
/// of instructions in a given LiveInterval.
Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Tue Apr 20 19:44:22 2010
@@ -1168,20 +1168,44 @@
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
bool
-SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
- unsigned SmallReg,
- unsigned Threshold) {
- // Then make sure the intervals are *short*.
- LiveInterval &LargeInt = li_->getInterval(LargeReg);
- LiveInterval &SmallInt = li_->getInterval(SmallReg);
- unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
- unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
- if (LargeSize > Threshold) {
- unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
- mri_->use_nodbg_end());
- unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
- mri_->use_nodbg_end());
- if (SmallUses*LargeSize < LargeUses*SmallSize)
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC) {
+ unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
+ // This heuristics is good enough in practice, but it's obviously not *right*.
+ // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+ // out all but the most restrictive register classes.
+ if (NewRCCount > 4 ||
+ // Early exit if the function is fairly small, coalesce aggressively if
+ // that's the case. For really special register classes with 3 or
+ // fewer registers, be a bit more careful.
+ (li_->getFuncInstructionCount() / NewRCCount) < 8)
+ return true;
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+ unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+ if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
+ return true;
+ // Estimate *register use density*. If it doubles or more, abort.
+ unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+ mri_->use_nodbg_end());
+ unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+ mri_->use_nodbg_end());
+ float NewDensity = ((float)(SrcUses + DstUses) / (SrcSize + DstSize)) /
+ NewRCCount;
+ if (SrcRC != NewRC && SrcSize > NewRCCount) {
+ unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
+ float Density = ((float)SrcUses / SrcSize) / SrcRCCount;
+ if (NewDensity > Density * 2.0f)
+ return false;
+ }
+ if (DstRC != NewRC && DstSize > NewRCCount) {
+ unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
+ float Density = ((float)DstUses / DstSize) / DstRCCount;
+ if (NewDensity > Density * 2.0f)
return false;
}
return true;
@@ -1517,10 +1541,11 @@
return false; // Not coalescable
}
- unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
- unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
- unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
- if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+ if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+ DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
+ << SrcRC->getName() << "/"
+ << DstRC->getName() << " -> "
+ << NewRC->getName() << ".\n");
Again = true; // May be possible to coalesce later.
return false;
}
@@ -1568,49 +1593,40 @@
}
}
- unsigned LargeReg = SrcReg;
- unsigned SmallReg = DstReg;
-
// Now determine the register class of the joined register.
- if (isExtSubReg) {
- if (SubIdx && DstRC && DstRC->isASubClass()) {
- // This is a move to a sub-register class. However, the source is a
- // sub-register of a larger register class. We don't know what should
- // the register class be. FIXME.
- Again = true;
- return false;
+ if (!SrcIsPhys && !DstIsPhys) {
+ if (isExtSubReg) {
+ NewRC =
+ SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
+ } else if (isInsSubReg) {
+ NewRC =
+ SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
+ } else {
+ NewRC = getCommonSubClass(SrcRC, DstRC);
}
- if (!DstIsPhys && !SrcIsPhys)
- NewRC = SrcRC;
- } else if (!SrcIsPhys && !DstIsPhys) {
- NewRC = getCommonSubClass(SrcRC, DstRC);
+
if (!NewRC) {
DEBUG(dbgs() << "\tDisjoint regclasses: "
<< SrcRC->getName() << ", "
<< DstRC->getName() << ".\n");
return false; // Not coalescable.
}
- if (DstRC->getSize() > SrcRC->getSize())
- std::swap(LargeReg, SmallReg);
- }
- // If we are joining two virtual registers and the resulting register
- // class is more restrictive (fewer register, smaller size). Check if it's
- // worth doing the merge.
- if (!SrcIsPhys && !DstIsPhys &&
- (isExtSubReg || DstRC->isASubClass()) &&
- !isWinToJoinCrossClass(LargeReg, SmallReg,
- allocatableRCRegs_[NewRC].count())) {
- DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
- << SrcRC->getName() << "/"
- << DstRC->getName() << " -> "
- << NewRC->getName() << ".\n");
- // Allow the coalescer to try again in case either side gets coalesced to
- // a physical register that's compatible with the other side. e.g.
- // r1024 = MOV32to32_ r1025
- // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
- Again = true; // May be possible to coalesce later.
- return false;
+ // If we are joining two virtual registers and the resulting register
+ // class is more restrictive (fewer register, smaller size). Check if it's
+ // worth doing the merge.
+ if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+ DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
+ << SrcRC->getName() << "/"
+ << DstRC->getName() << " -> "
+ << NewRC->getName() << ".\n");
+ // Allow the coalescer to try again in case either side gets coalesced to
+ // a physical register that's compatible with the other side. e.g.
+ // r1024 = MOV32to32_ r1025
+ // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
}
}
Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h Tue Apr 20 19:44:22 2010
@@ -179,8 +179,11 @@
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
- bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
- unsigned Threshold);
+ bool isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC);
/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
/// register with a physical register, check if any of the virtual register
Modified: llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll Tue Apr 20 19:44:22 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
entry:
Modified: llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll Tue Apr 20 19:44:22 2010
@@ -1,4 +1,5 @@
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
%struct.XXDAlphaTest = type { float, i16, i8, i8 }
@@ -61,11 +62,15 @@
define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
entry:
+; CHECK: t:
+; CHECK: xorl %ecx, %ecx
%0 = trunc i64 %key_token to i32 ; <i32> [#uses=1]
%1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5]
br label %bb1132
bb51: ; preds = %bb1132
+; CHECK: .align 4
+; CHECK: andl $7
%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; <i16*> [#uses=1]
%3 = load i16* %2, align 1 ; <i16> [#uses=3]
%4 = lshr i16 %3, 6 ; <i16> [#uses=1]
Modified: llvm/trunk/test/CodeGen/X86/postra-licm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/postra-licm.ll?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/postra-licm.ll (original)
+++ llvm/trunk/test/CodeGen/X86/postra-licm.ll Tue Apr 20 19:44:22 2010
@@ -149,7 +149,6 @@
bb.nph: ; preds = %entry
; X86-64: movq _map_4_to_16 at GOTPCREL(%rip)
-; X86-64: movq _map_4_to_16 at GOTPCREL(%rip)
; X86-64: .align 4
%tmp5 = zext i32 undef to i64 ; <i64> [#uses=1]
%tmp6 = add i64 %tmp5, 1 ; <i64> [#uses=1]
Modified: llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll?rev=101971&r1=101970&r2=101971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll Tue Apr 20 19:44:22 2010
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN: grep asm-printer %t | grep 156
-; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 4
+; RUN: grep asm-printer %t | grep 166
+; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
type { [62 x %struct.Bitvec*] } ; type %0
type { i8* } ; type %1
More information about the llvm-commits
mailing list