[llvm-commits] [llvm] r127351 - in /llvm/trunk: lib/CodeGen/SimpleRegisterCoalescing.cpp test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll test/CodeGen/X86/fold-pcmpeqd-2.ll

Wed Mar 9 11:27:06 PST 2011

Author: stoklund
Date: Wed Mar  9 13:27:06 2011
New Revision: 127351

URL: http://llvm.org/viewvc/llvm-project?rev=127351&view=rev
Log:
Make physreg coalescing independent on the number of uses of the virtual register.

The damage done by physreg coalescing only depends on the number of instructions
the extended physreg live range covers. This fixes PR9438.

The heuristic is still luck-based, and physreg coalescing really should be
disabled completely. We need a register allocator with better hinting support
before that is possible.

Convert a test to FileCheck and force spilling by inserting an extra call. The
previous spilling behavior was dependent on misguided physreg coalescing
decisions.

Added:
    llvm/trunk/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
Modified:
    llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
    llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll

Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=127351&r1=127350&r2=127351&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Wed Mar  9 13:27:06 2011
@@ -1038,9 +1038,7 @@
     const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
     unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
     unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-    if (Length > Threshold &&
-        std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
-                      mri_->use_nodbg_end()) * Threshold < Length) {
+    if (Length > Threshold) {
       // Before giving up coalescing, if definition of source is defined by
       // trivial computation, try rematerializing it.
       if (!CP.isFlipped() &&

Added: llvm/trunk/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll?rev=127351&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll Wed Mar  9 13:27:06 2011
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=yonah < %s
+; PR9438
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-freebsd9.0"
+
+; The 'call fastcc' ties down %ebx, %ecx, and %edx.
+; A MUL8r ties down %al, leaving no GR32_ABCD registers available.
+; The coalescer can easily overallocate physical registers,
+; and register allocation fails.
+
+declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
+
+define i32 @cvtchar(i8* nocapture %sp) nounwind {
+  %temp.i = alloca [2 x i8], align 1
+  %tmp1 = load i8* %sp, align 1
+  %div = udiv i8 %tmp1, 10
+  %rem = urem i8 %div, 10
+  %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
+  store i8 %rem, i8* %arrayidx.i, align 1
+  %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
+  ret i32 undef
+}

Modified: llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll?rev=127351&r1=127350&r2=127351&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll Wed Mar  9 13:27:06 2011
@@ -1,10 +1,20 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-; This testcase should need to spill the -1 value on x86-32,
+; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
 ; should use a constant-pool load instead.
 
+; Constant pool all-ones vector:
+; CHECK: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+
+; No pcmpeqd instructions, everybody uses the constant pool.
+; CHECK: program_1:
+; CHECK-NOT: pcmpeqd
+
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
 	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
 	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
@@ -57,6 +67,7 @@
 	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
 	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
 	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]