[llvm-branch-commits] [llvm-branch] r127384 - in /llvm/branches/release_29: ./ lib/CodeGen/SimpleRegisterCoalescing.cpp test/CodeGen/X86/2009-03-11-CoalescerBug.ll test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll test/CodeGen/X86/fold-pcmpeqd-2.ll

Wed Mar 9 17:09:21 PST 2011

Author: void
Date: Wed Mar  9 19:09:20 2011
New Revision: 127384

URL: http://llvm.org/viewvc/llvm-project?rev=127384&view=rev
Log:
For PR9438:

--- Merging r127350 into '.':
D    test/CodeGen/X86/2009-03-11-CoalescerBug.ll

--- Merging r127351 into '.':
A    test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
U    test/CodeGen/X86/fold-pcmpeqd-2.ll
U    lib/CodeGen/SimpleRegisterCoalescing.cpp


Added:
    llvm/branches/release_29/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
      - copied unchanged from r127351, llvm/trunk/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
Removed:
    llvm/branches/release_29/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
Modified:
    llvm/branches/release_29/   (props changed)
    llvm/branches/release_29/lib/CodeGen/SimpleRegisterCoalescing.cpp
    llvm/branches/release_29/test/CodeGen/X86/fold-pcmpeqd-2.ll

Propchange: llvm/branches/release_29/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Mar  9 19:09:20 2011
@@ -1,2 +1,2 @@
 /llvm/branches/Apple/Pertwee:110850,110961
-/llvm/trunk:127264
+/llvm/trunk:127264,127350-127351

Modified: llvm/branches/release_29/lib/CodeGen/SimpleRegisterCoalescing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_29/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=127384&r1=127383&r2=127384&view=diff
==============================================================================
--- llvm/branches/release_29/lib/CodeGen/SimpleRegisterCoalescing.cpp (original)
+++ llvm/branches/release_29/lib/CodeGen/SimpleRegisterCoalescing.cpp Wed Mar  9 19:09:20 2011
@@ -1038,9 +1038,7 @@
     const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
     unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
     unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-    if (Length > Threshold &&
-        std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
-                      mri_->use_nodbg_end()) * Threshold < Length) {
+    if (Length > Threshold) {
       // Before giving up coalescing, if definition of source is defined by
       // trivial computation, try rematerializing it.
       if (!CP.isFlipped() &&

Removed: llvm/branches/release_29/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_29/test/CodeGen/X86/2009-03-11-CoalescerBug.ll?rev=127383&view=auto
==============================================================================
--- llvm/branches/release_29/test/CodeGen/X86/2009-03-11-CoalescerBug.ll (original)
+++ llvm/branches/release_29/test/CodeGen/X86/2009-03-11-CoalescerBug.ll (removed)
@@ -1,85 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
-
- at lookupTable5B = external global [64 x i32], align 32		; <[64 x i32]*> [#uses=1]
- at lookupTable3B = external global [16 x i32], align 32		; <[16 x i32]*> [#uses=1]
- at disparity0 = external global i32		; <i32*> [#uses=5]
- at disparity1 = external global i32		; <i32*> [#uses=3]
-
-define i32 @calc(i32 %theWord, i32 %k) nounwind {
-entry:
-	%0 = lshr i32 %theWord, 3		; <i32> [#uses=1]
-	%1 = and i32 %0, 31		; <i32> [#uses=1]
-	%2 = shl i32 %k, 5		; <i32> [#uses=1]
-	%3 = or i32 %1, %2		; <i32> [#uses=1]
-	%4 = and i32 %theWord, 7		; <i32> [#uses=1]
-	%5 = shl i32 %k, 3		; <i32> [#uses=1]
-	%6 = or i32 %5, %4		; <i32> [#uses=1]
-	%7 = getelementptr [64 x i32]* @lookupTable5B, i32 0, i32 %3		; <i32*> [#uses=1]
-	%8 = load i32* %7, align 4		; <i32> [#uses=5]
-	%9 = getelementptr [16 x i32]* @lookupTable3B, i32 0, i32 %6		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 4		; <i32> [#uses=5]
-	%11 = and i32 %8, 65536		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
-	br i1 %12, label %bb1, label %bb
-
-bb:		; preds = %entry
-	%13 = and i32 %8, 994		; <i32> [#uses=1]
-	%14 = load i32* @disparity0, align 4		; <i32> [#uses=2]
-	store i32 %14, i32* @disparity1, align 4
-	br label %bb8
-
-bb1:		; preds = %entry
-	%15 = lshr i32 %8, 18		; <i32> [#uses=1]
-	%16 = and i32 %15, 1		; <i32> [#uses=1]
-	%17 = load i32* @disparity0, align 4		; <i32> [#uses=4]
-	%18 = icmp eq i32 %16, %17		; <i1> [#uses=1]
-	%not = select i1 %18, i32 0, i32 994		; <i32> [#uses=1]
-	%.masked = and i32 %8, 994		; <i32> [#uses=1]
-	%result.1 = xor i32 %not, %.masked		; <i32> [#uses=2]
-	%19 = and i32 %8, 524288		; <i32> [#uses=1]
-	%20 = icmp eq i32 %19, 0		; <i1> [#uses=1]
-	br i1 %20, label %bb7, label %bb6
-
-bb6:		; preds = %bb1
-	%21 = xor i32 %17, 1		; <i32> [#uses=2]
-	store i32 %21, i32* @disparity1, align 4
-	br label %bb8
-
-bb7:		; preds = %bb1
-	store i32 %17, i32* @disparity1, align 4
-	br label %bb8
-
-bb8:		; preds = %bb7, %bb6, %bb
-	%22 = phi i32 [ %17, %bb7 ], [ %21, %bb6 ], [ %14, %bb ]		; <i32> [#uses=4]
-	%result.0 = phi i32 [ %result.1, %bb7 ], [ %result.1, %bb6 ], [ %13, %bb ]		; <i32> [#uses=2]
-	%23 = and i32 %10, 65536		; <i32> [#uses=1]
-	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
-	br i1 %24, label %bb10, label %bb9
-
-bb9:		; preds = %bb8
-	%25 = and i32 %10, 29		; <i32> [#uses=1]
-	%26 = or i32 %result.0, %25		; <i32> [#uses=1]
-	store i32 %22, i32* @disparity0, align 4
-	ret i32 %26
-
-bb10:		; preds = %bb8
-	%27 = lshr i32 %10, 18		; <i32> [#uses=1]
-	%28 = and i32 %27, 1		; <i32> [#uses=1]
-	%29 = icmp eq i32 %28, %22		; <i1> [#uses=1]
-	%not13 = select i1 %29, i32 0, i32 29		; <i32> [#uses=1]
-	%.masked20 = and i32 %10, 29		; <i32> [#uses=1]
-	%.pn = xor i32 %not13, %.masked20		; <i32> [#uses=1]
-	%result.3 = or i32 %.pn, %result.0		; <i32> [#uses=2]
-	%30 = and i32 %10, 524288		; <i32> [#uses=1]
-	%31 = icmp eq i32 %30, 0		; <i1> [#uses=1]
-	br i1 %31, label %bb17, label %bb16
-
-bb16:		; preds = %bb10
-	%32 = xor i32 %22, 1		; <i32> [#uses=1]
-	store i32 %32, i32* @disparity0, align 4
-	ret i32 %result.3
-
-bb17:		; preds = %bb10
-	store i32 %22, i32* @disparity0, align 4
-	ret i32 %result.3
-}

Modified: llvm/branches/release_29/test/CodeGen/X86/fold-pcmpeqd-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_29/test/CodeGen/X86/fold-pcmpeqd-2.ll?rev=127384&r1=127383&r2=127384&view=diff
==============================================================================
--- llvm/branches/release_29/test/CodeGen/X86/fold-pcmpeqd-2.ll (original)
+++ llvm/branches/release_29/test/CodeGen/X86/fold-pcmpeqd-2.ll Wed Mar  9 19:09:20 2011
@@ -1,10 +1,20 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-; This testcase should need to spill the -1 value on x86-32,
+; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
 ; should use a constant-pool load instead.
 
+; Constant pool all-ones vector:
+; CHECK: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+
+; No pcmpeqd instructions, everybody uses the constant pool.
+; CHECK: program_1:
+; CHECK-NOT: pcmpeqd
+
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
 	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
 	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
@@ -57,6 +67,7 @@
 	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
 	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
 	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]