[llvm-commits] [llvm] r144037 - in /llvm/trunk: lib/CodeGen/ExecutionDepsFix.cpp test/CodeGen/X86/avx-intrinsics-x86.ll test/CodeGen/X86/avx-logic.ll test/CodeGen/X86/nontemporal.ll test/CodeGen/X86/sse-align-3.ll test/CodeGen/X86/sse2-blend.ll test/CodeGen/X86/sse2.ll test/CodeGen/X86/vec_shuffle.ll

Jakob Stoklund Olesen stoklund at 2pi.dk
Mon Nov 7 15:08:22 PST 2011


Author: stoklund
Date: Mon Nov  7 17:08:21 2011
New Revision: 144037

URL: http://llvm.org/viewvc/llvm-project?rev=144037&view=rev
Log:
Kill and collapse outstanding DomainValues.

DomainValues that are only used by "don't care" instructions are now
collapsed to the first possible execution domain after all basic blocks
have been processed.  This typically means the PS domain on x86.

For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are
completely collapsed to the PS domain instead of containing a mix of
execution domains created by isel.

Modified:
    llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/avx-logic.ll
    llvm/trunk/test/CodeGen/X86/nontemporal.ll
    llvm/trunk/test/CodeGen/X86/sse-align-3.ll
    llvm/trunk/test/CodeGen/X86/sse2-blend.ll
    llvm/trunk/test/CodeGen/X86/sse2.ll
    llvm/trunk/test/CodeGen/X86/vec_shuffle.ll

Modified: llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp (original)
+++ llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp Mon Nov  7 17:08:21 2011
@@ -510,11 +510,20 @@
     leaveBasicBlock(MBB);
   }
 
-  // Clear the LiveOuts vectors. Should we also collapse any remaining
-  // DomainValues?
-  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
-         i != e; ++i)
-    delete[] i->second;
+  // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+    LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+    if (FI == LiveOuts.end())
+      continue;
+    assert(FI->second && "Null entry");
+    // The DomainValue is collapsed when the last reference is killed.
+    LiveRegs = FI->second;
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      if (LiveRegs[i])
+        Kill(i);
+    delete[] LiveRegs;
+  }
   LiveOuts.clear();
   Avail.clear();
   Allocator.DestroyAll();

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Mon Nov  7 17:08:21 2011
@@ -315,24 +315,31 @@
 
 
 define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: test_x86_sse2_movnt_dq
   ; CHECK: movl
   ; CHECK: vmovntdq
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
+  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
 
 
 define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK test_x86_sse2_movnt_pd
   ; CHECK: movl
   ; CHECK: vmovntpd
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+  ; fadd operation forces the execution domain.
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
   ; CHECK: vmulsd
   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -749,6 +756,7 @@
 
 
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
   ; CHECK: movl
   ; CHECK: vmovq
   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +766,7 @@
 
 
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +776,18 @@
 
 
 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
-  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
   ; CHECK: vsubsd
   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res

Modified: llvm/trunk/test/CodeGen/X86/avx-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-logic.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-logic.ll Mon Nov  7 17:08:21 2011
@@ -165,7 +165,9 @@
 ; CHECK: vpandn  %xmm
 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
   %x = and <2 x i64> %a, %y
   ret <2 x i64> %x
 }
@@ -173,7 +175,9 @@
 ; CHECK: vpand %xmm
 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %x = and <2 x i64> %a, %b
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
   ret <2 x i64> %x
 }
 

Modified: llvm/trunk/test/CodeGen/X86/nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal.ll Mon Nov  7 17:08:21 2011
@@ -3,13 +3,16 @@
 define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
 ; CHECK: movntps
   %cast = bitcast i8* %B to <4 x float>*
-  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
 ; CHECK: movntdq
   %cast1 = bitcast i8* %B to <2 x i64>*
-  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+  %E2 = add <2 x i64> %E, <i64 1, i64 2>
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
 ; CHECK: movntpd
   %cast2 = bitcast i8* %B to <2 x double>*
-  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
 ; CHECK: movnti
   %cast3 = bitcast i8* %B to i32*
   store i32 %D, i32* %cast3, align 16, !nontemporal !0

Modified: llvm/trunk/test/CodeGen/X86/sse-align-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-align-3.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-align-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-align-3.ll Mon Nov  7 17:08:21 2011
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     movapd
 ; CHECK:     movaps
-; CHECK-NOT:     movaps
-; CHECK:     movapd
+; CHECK-NOT:     movapd
+; CHECK:     movaps
 ; CHECK-NOT:     movap
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {

Modified: llvm/trunk/test/CodeGen/X86/sse2-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-blend.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-blend.ll Mon Nov  7 17:08:21 2011
@@ -26,11 +26,10 @@
   ret void
 }
 
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
 ; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
@@ -43,16 +42,14 @@
   ret void
 }
 
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
 ; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-
 define void at vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
   %A = load <4 x double>* %v1
   %B = load <4 x double>* %v2

Modified: llvm/trunk/test/CodeGen/X86/sse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2.ll Mon Nov  7 17:08:21 2011
@@ -144,7 +144,7 @@
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
 ; CHECK: test11:
-; CHECK: movapd	4(%esp), %xmm0
+; CHECK: movaps	4(%esp), %xmm0
 }
 
 define void @test12() nounwind {

Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle.ll?rev=144037&r1=144036&r2=144037&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle.ll Mon Nov  7 17:08:21 2011
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq    %t | count 1
-; RUN: grep pshufd  %t | count 1
-; RUN: grep movupd  %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
 	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@
 	ret void
 }
 
+; CHECK: test_v2sd
+; CHECK: movups	8(%esp)
+; CHECK: movaps
 define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@
 	ret void
 }
 
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
 define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
 	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]





More information about the llvm-commits mailing list