[llvm-commits] [Review Request][PR9234] 59 patches of test/CodeGen/X86 for Windows hosts

Thu Feb 17 00:04:50 PST 2011

Hello, guys!

At first, excuse me to post bored patches. Please be patient.
(IIRC, this is take 3)

http://llvm.org/bugs/show_bug.cgi?id=9234

On Windows hosts, llc -march=x86_64 is inferred as
-mtriple=x86_64-{mingw32|win32}, and many tests would fail due to
difference of Win64 ABI.
It would be easier to add explicit -mtriple=x86_64-{darwin|linux} to
hide failures.
Some guys have advised me not to hide such failures, and I have been working on.

All patches make tests FileCheck-ized.
Each patch can be categorized;

  - Add explicit -mtriple=x86_64-linux.
    They would be nonsense to apply to Win64. (eg. redzone)
  - Add both -mtriple=x86_64-linux and x86_64-win32
    - Relax expressions to match args registers.
      eg. %{{rdi|rcx}}
    - Add a pattern for Win64.
      I think they would be better.
    - Add *as-is* pattern for Win64.
      I gave them up to tweak. I am happy if someone improves them.

Individual patches can be browsed at github.
https://github.com/chapuni/LLVM/compare/master...w32tests/20110217#files_bucket

I guess it would be hard (and bored, me too!), though, please take a
look into them, thank you!

...Takumi


 test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll   |    7 ++-
 test/CodeGen/X86/2007-07-18-Vector-Extract.ll   |    6 ++-
 test/CodeGen/X86/add.ll                         |   13 +++--
 test/CodeGen/X86/apm.ll                         |   11 ++++-
 test/CodeGen/X86/avoid-lea-scale2.ll            |    5 ++-
 test/CodeGen/X86/break-sse-dep.ll               |   15 +++---
 test/CodeGen/X86/byval.ll                       |    5 ++-
 test/CodeGen/X86/byval2.ll                      |   39 +++++++++++++-
 test/CodeGen/X86/byval3.ll                      |   42 ++++++++++++++-
 test/CodeGen/X86/byval4.ll                      |   42 ++++++++++++++-
 test/CodeGen/X86/byval5.ll                      |   42 ++++++++++++++-
 test/CodeGen/X86/coalescer-commute2.ll          |   21 +++++++-
 test/CodeGen/X86/codegen-prepare-extload.ll     |    5 +-
 test/CodeGen/X86/constant-pool-remat-0.ll       |   35 +++++++++++--
 test/CodeGen/X86/constant-pool-sharing.ll       |    7 ++-
 test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll |   12 +++-
 test/CodeGen/X86/ctpop-combine.ll               |    4 +-
 test/CodeGen/X86/fast-isel-cmp-branch.ll        |    7 ++-
 test/CodeGen/X86/fast-isel-gep.ll               |   17 +++---
 test/CodeGen/X86/fold-mul-lohi.ll               |    5 +-
 test/CodeGen/X86/gather-addresses.ll            |   23 ++++----
 test/CodeGen/X86/h-register-store.ll            |   32 +++++++++--
 test/CodeGen/X86/h-registers-0.ll               |   24 ++++++++-
 test/CodeGen/X86/i128-ret.ll                    |    6 ++-
 test/CodeGen/X86/i64-mem-copy.ll                |    8 ++-
 test/CodeGen/X86/lea-3.ll                       |   15 +++--
 test/CodeGen/X86/lea.ll                         |    9 ++--
 test/CodeGen/X86/lsr-overflow.ll                |    5 +-
 test/CodeGen/X86/lsr-reuse-trunc.ll             |    9 ++--
 test/CodeGen/X86/masked-iv-safe.ll              |   43 ++++++++++++---
 test/CodeGen/X86/memcmp.ll                      |   21 ++++----
 test/CodeGen/X86/mmx-copy-gprs.ll               |   10 +++-
 test/CodeGen/X86/movgs.ll                       |    7 ++-
 test/CodeGen/X86/optimize-max-3.ll              |   11 ++--
 test/CodeGen/X86/peep-vector-extract-concat.ll  |    6 ++-
 test/CodeGen/X86/pmulld.ll                      |   12 ++++-
 test/CodeGen/X86/pr9127.ll                      |    2 +-
 test/CodeGen/X86/red-zone.ll                    |    2 +-
 test/CodeGen/X86/remat-mov-0.ll                 |   15 +++---
 test/CodeGen/X86/scalar-min-max-fill-operand.ll |   34 +++++++++++-
 test/CodeGen/X86/sse-align-0.ll                 |   16 +++++-
 test/CodeGen/X86/sse-align-3.ll                 |   20 +++++++-
 test/CodeGen/X86/sse-align-7.ll                 |    8 +++-
 test/CodeGen/X86/sse-commute.ll                 |   10 +++-
 test/CodeGen/X86/sse_reload_fold.ll             |    5 +-
 test/CodeGen/X86/stdarg.ll                      |   16 +++++-
 test/CodeGen/X86/stride-nine-with-base-reg.ll   |   34 +++++++++++-
 test/CodeGen/X86/stride-reuse.ll                |   33 +++++++++++-
 test/CodeGen/X86/tailcallbyval64.ll             |   25 +++++++--
 test/CodeGen/X86/test-shrink.ll                 |   21 ++++----
 test/CodeGen/X86/use-add-flags.ll               |   15 +++---
 test/CodeGen/X86/v2f32.ll                       |   67 +++++++++++++++++++----
 test/CodeGen/X86/vec_cast.ll                    |    3 +-
 test/CodeGen/X86/vec_set-8.ll                   |    7 ++-
 test/CodeGen/X86/vec_shuffle-17.ll              |    7 ++-
 test/CodeGen/X86/vec_shuffle-37.ll              |    5 +-
 test/CodeGen/X86/widen_load-0.ll                |    8 +++-
 test/CodeGen/X86/x86-64-malloc.ll               |    4 +-
 test/CodeGen/X86/xor.ll                         |    9 ++--
 59 files changed, 749 insertions(+), 198 deletions(-)
-------------- next part --------------

diff --git a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
index de226a1..3458550 100644
--- a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
+++ b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
@@ -1,5 +1,8 @@
-; RUN: llc %s -o - -march=x86-64 | grep {(%rdi,%rax,8)}
-; RUN: llc %s -o - -march=x86-64 | not grep {addq.*8}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: {{addq.*8}}
+; CHECK:     ({{%rdi|%rcx}},%rax,8)
+; CHECK-NOT: {{addq.*8}}
 
 define void @foo(double* %y) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index 8625b27..6288c4a 100644
--- a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
-; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck %s
+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax
+; CHECK: movq 8([[A0]]), %rax
 define i64 @foo_0(<2 x i64>* %val) {
 entry:
         %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 3ec5358..62c8980 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
 
 ; The immediate can be encoded in a smaller way if the
 ; instruction is a sub instead of an add.
@@ -43,7 +44,7 @@ overflow:
 ; X32-NEXT: jo
 
 ; X64:        test4:
-; X64:          addl	%esi, %edi
+; X64:          addl	%e[[A1:si|dx]], %e[[A0:di|cx]]
 ; X64-NEXT:	jo
 }
 
@@ -66,7 +67,7 @@ carry:
 ; X32-NEXT: jb
 
 ; X64:        test5:
-; X64:          addl	%esi, %edi
+; X64:          addl	%e[[A1]], %e[[A0]]
 ; X64-NEXT:	jb
 }
 
@@ -87,8 +88,8 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
 ; X32-NEXT: ret
         
 ; X64: test6:
-; X64:	shlq	$32, %rsi
-; X64:	leaq	(%rsi,%rdi), %rax
+; X64:	shlq	$32, %r[[A1]]
+; X64:	leaq	(%r[[A1]],%r[[A0]]), %rax
 ; X64:	ret
 }
 
@@ -98,7 +99,7 @@ define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
 }
 
 ; X64: test7:
-; X64: addl %esi, %eax
+; X64: addl %e[[A1]], %eax
 ; X64-NEXT: setb %dl
 ; X64-NEXT: ret
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index d0c64f2..b514cf6 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,10 +1,16 @@
-; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
 ; CHECK: leaq    (%rdi), %rax
 ; CHECK-NEXT: movl    %esi, %ecx
 ; CHECK-NEXT: monitor
+; WIN64: foo:
+; WIN64:      leaq    (%rcx), %rax
+; WIN64-NEXT: movl    %edx, %ecx
+; WIN64-NEXT: movl    %r8d, %edx
+; WIN64-NEXT: monitor
 define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
 entry:
   tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
@@ -17,6 +23,9 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
 ; CHECK: movl    %edi, %ecx
 ; CHECK-NEXT: movl    %esi, %eax
 ; CHECK-NEXT: mwait
+; WIN64: bar:
+; WIN64:      movl    %edx, %eax
+; WIN64-NEXT: mwait
 define void @bar(i32 %E, i32 %H) nounwind {
 entry:
   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
diff --git a/test/CodeGen/X86/avoid-lea-scale2.ll b/test/CodeGen/X86/avoid-lea-scale2.ll
index 8003de2..545a3ce 100644
--- a/test/CodeGen/X86/avoid-lea-scale2.ll
+++ b/test/CodeGen/X86/avoid-lea-scale2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86-64 | grep {leal.*-2(\[%\]rdi,\[%\]rdi)}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK: leal -2(%rdi,%rdi)
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; WIN64: leal -2(%rcx,%rcx)
 
 define i32 @foo(i32 %x) nounwind readnone {
   %t0 = shl i32 %x, 1
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 094cbc7..2dee575 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s
 
 define double @t1(float* nocapture %x) nounwind readonly ssp {
 entry:
 ; CHECK: t1:
-; CHECK: movss (%rdi), %xmm0
+; CHECK: movss ([[A0:%rdi|%rcx]]), %xmm0
 ; CHECK: cvtss2sd %xmm0, %xmm0
 
   %0 = load float* %x, align 4
@@ -14,7 +15,7 @@ entry:
 define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
 entry:
 ; CHECK: t2:
-; CHECK: cvtsd2ss (%rdi), %xmm0
+; CHECK: cvtsd2ss ([[A0]]), %xmm0
   %0 = load double* %x, align 8
   %1 = fptrunc double %0 to float
   ret float %1
@@ -23,7 +24,7 @@ entry:
 define float @squirtf(float* %x) nounwind {
 entry:
 ; CHECK: squirtf:
-; CHECK: movss (%rdi), %xmm0
+; CHECK: movss ([[A0]]), %xmm0
 ; CHECK: sqrtss %xmm0, %xmm0
   %z = load float* %x
   %t = call float @llvm.sqrt.f32(float %z)
@@ -33,7 +34,7 @@ entry:
 define double @squirt(double* %x) nounwind {
 entry:
 ; CHECK: squirt:
-; CHECK: movsd (%rdi), %xmm0
+; CHECK: movsd ([[A0]]), %xmm0
 ; CHECK: sqrtsd %xmm0, %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
@@ -43,7 +44,7 @@ entry:
 define float @squirtf_size(float* %x) nounwind optsize {
 entry:
 ; CHECK: squirtf_size:
-; CHECK: sqrtss (%rdi), %xmm0
+; CHECK: sqrtss ([[A0]]), %xmm0
   %z = load float* %x
   %t = call float @llvm.sqrt.f32(float %z)
   ret float %t
@@ -52,7 +53,7 @@ entry:
 define double @squirt_size(double* %x) nounwind optsize {
 entry:
 ; CHECK: squirt_size:
-; CHECK: sqrtsd (%rdi), %xmm0
+; CHECK: sqrtsd ([[A0]]), %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
   ret double %t
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index ac0bc09..9b75537 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck -check-prefix=WIN64 %s
 ; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
 
 ; X86: movl	4(%esp), %eax
@@ -6,6 +7,8 @@
 
 ; X86-64: movq	8(%rsp), %rax
 
+; WIN64:  movq   (%rcx), %rax
+
 %struct.s = type { i64, i64, i64 }
 
 define i64 @f(%struct.s* byval %a) {
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 71129f5..c2ac7a5 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,5 +1,40 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64: {{^g:}}
+; W64:      subq    $184, %rsp
+; W64:      movq    %rsi, 176(%rsp)
+; W64:      movq    %rcx, 32(%rsp)
+; W64-NEXT: movq    %rdx, 40(%rsp)
+; W64-NEXT: movq    %r8, 48(%rsp)
+; W64-NEXT: leaq    32(%rsp), %rsi
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    176(%rsp), %rsi
+; W64-NEXT: addq    $184, %rsp
+; W64-NEXT: ret
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 504e0be..2f611ef 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,5 +1,43 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     rep
+; X64-NOT:     movsq
+; X64:     rep;movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+; X64:     rep;movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64: {{^g:}}
+; W64:      subq    $184, %rsp
+; W64-NEXT: movq    %rsi, 176(%rsp)
+; W64-NEXT: movl    %ecx, 32(%rsp)
+; W64-NEXT: movl    %edx, 36(%rsp)
+; W64-NEXT: movl    %r8d, 40(%rsp)
+; W64-NEXT: movl    %r9d, 44(%rsp)
+; W64-NEXT: movl    224(%rsp), %eax
+; W64-NEXT: movl    %eax, 48(%rsp)
+; W64-NEXT: movl    232(%rsp), %eax
+; W64-NEXT: movl    %eax, 52(%rsp)
+; W64-NEXT: leaq    32(%rsp), %rsi
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    176(%rsp), %rsi
+; W64-NEXT: addq    $184, %rsp
+; W64-NEXT: ret
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     rep
+; X32-NOT:     movsl
+; X32:     rep;movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+; X32:     rep;movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
                    i32, i32, i32, i32, i32, i32, i32, i32,
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 4db9d65..1ba23cf 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,43 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     rep
+; X64-NOT:     movsq
+; X64:     rep;movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+; X64:     rep;movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64: {{^g:}}
+; W64:      subq    $184, %rsp
+; W64-NEXT: movq    %rsi, 176(%rsp)
+; W64-NEXT: movw    %cx, 32(%rsp)
+; W64-NEXT: movw    %dx, 34(%rsp)
+; W64-NEXT: movw    %r8w, 36(%rsp)
+; W64-NEXT: movw    %r9w, 38(%rsp)
+; W64-NEXT: movw    224(%rsp), %ax
+; W64-NEXT: movw    %ax, 40(%rsp)
+; W64-NEXT: movw    232(%rsp), %ax
+; W64-NEXT: movw    %ax, 42(%rsp)
+; W64-NEXT: leaq    32(%rsp), %rsi
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    176(%rsp), %rsi
+; W64-NEXT: addq    $184, %rsp
+; W64-NEXT: ret
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     rep
+; X32-NOT:     movsl
+; X32:     rep;movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+; X32:     rep;movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                    i16, i16, i16, i16, i16, i16, i16, i16,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index 69c115b..32c9cc7 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,43 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     rep
+; X64-NOT:     movsq
+; X64:     rep;movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+; X64:     rep;movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64: {{^g:}}
+; W64:      subq    $184, %rsp
+; W64:      movq    %rsi, 176(%rsp)
+; W64-NEXT: movb    %cl, 40(%rsp)
+; W64-NEXT: movb    %dl, 41(%rsp)
+; W64-NEXT: movb    %r8b, 42(%rsp)
+; W64-NEXT: movb    %r9b, 43(%rsp)
+; W64-NEXT: movb    224(%rsp), %al
+; W64-NEXT: movb    %al, 44(%rsp)
+; W64-NEXT: movb    232(%rsp), %al
+; W64-NEXT: movb    %al, 45(%rsp)
+; W64-NEXT: leaq    40(%rsp), %rsi
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    %rsi, %rcx
+; W64-NEXT: callq   f
+; W64-NEXT: movq    176(%rsp), %rsi
+; W64-NEXT: addq    $184, %rsp
+; W64-NEXT: ret
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     rep
+; X32-NOT:     movsl
+; X32:     rep;movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+; X32:     rep;movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
                    i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index 5d10bba..9ff7143 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,5 +1,22 @@
-; RUN: llc < %s -march=x86-64 | grep paddw | count 2
-; RUN: llc < %s -march=x86-64 | not grep mov
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     paddw
+; CHECK-NOT:     mov
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; WIN64:   {{^test1:}}
+; WIN64:      movdqa  (%rcx), %xmm0
+; WIN64-NEXT: paddw   (%rdx), %xmm0
+; WIN64:   {{^test2:}}
+; WIN64:      movdqa  (%rdx), %xmm0
+; WIN64-NEXT: paddw   (%rcx), %xmm0
+; WIN64:   {{^test3:}}
+; WIN64:      movdqa  (%rcx), %xmm1
+; WIN64-NEXT: pshufd  $27, %xmm1, %xmm0
+; WIN64-NEXT: addps   %xmm1, %xmm0
 
 ; The 2-addr pass should ensure that identical code is produced for these functions
 ; no extra copy should be generated.
diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll
index 9f57d53..14df815 100644
--- a/test/CodeGen/X86/codegen-prepare-extload.ll
+++ b/test/CodeGen/X86/codegen-prepare-extload.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
 ; rdar://7304838
 
 ; CodeGenPrepare should move the zext into the block with the load
 ; so that SelectionDAG can select it with the load.
 
-; CHECK: movzbl (%rdi), %eax
+; CHECK: movzbl ({{%rdi|%rcx}}), %eax
 
 define void @foo(i8* %p, i32* %q) {
 entry:
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 2a44463..e181237 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,34 @@
-; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
-; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer | grep 6
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats  -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:     LCPI
+; X64:     LCPI
+; X64:     LCPI
+; X64-NOT: LCPI
+
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X64stat
+; X64stat: 6 asm-printer
+
+; It seems rematerialization would not be needed on win64.
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64: .LCPI0_0:
+; W64: array:
+; W64:      subq    $56, %rsp
+; W64-NEXT: movaps  %xmm6, 32(%rsp)
+; W64-NEXT: movss   .LCPI0_0(%rip), %xmm6
+; W64-NEXT: mulss   %xmm6, %xmm0
+; W64-NEXT: callq   qux
+; W64-NEXT: mulss   %xmm6, %xmm0
+; W64-NEXT: movaps  32(%rsp), %xmm6
+; W64-NEXT: addq    $56, %rsp
+; W64-NEXT: ret
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; X32:     LCPI
+; X32:     LCPI
+; X32:     LCPI
+; X32-NOT: LCPI
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X32stat
+; X32stat: 12 asm-printer
 
 declare float @qux(float %y)
 
diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
index 33de576..f979945 100644
--- a/test/CodeGen/X86/constant-pool-sharing.ll
+++ b/test/CodeGen/X86/constant-pool-sharing.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; llc should share constant pool entries between this integer vector
 ; and this floating-point vector since they have the same encoding.
 
 ; CHECK:  LCPI0_0(%rip), %xmm0
-; CHECK:  movaps        %xmm0, (%rdi)
-; CHECK:  movaps        %xmm0, (%rsi)
+; CHECK:  movaps        %xmm0, ({{%rdi|%rcx}})
+; CHECK:  movaps        %xmm0, ({{%rsi|%rdx}})
 
 define void @foo(<4 x i32>* %p, <4 x float>* %q, i1 %t) nounwind {
 entry:
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 17cb2b3..7739909 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,6 +1,12 @@
-; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
-; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 9
-; RUN: grep {leal	1(\%rsi),} %t
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=X64stat
+; X64stat: 9 asm-printer
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=W64stat
+; W64stat: 9 asm-printer
+
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64: leal 1(%rsi),
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64: leal 1(%rdx),
 
 define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
 entry:
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
index c957d38..6406cc7 100644
--- a/test/CodeGen/X86/ctpop-combine.ll
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -9,7 +9,7 @@ define i32 @test1(i64 %x) nounwind readnone {
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 ; CHECK: test1:
-; CHECK: leaq -1(%rdi)
+; CHECK: leaq -1([[A0:%rdi|%rcx]])
 ; CHECK-NEXT: testq
 ; CHECK-NEXT: setne
 ; CHECK: ret
@@ -22,7 +22,7 @@ define i32 @test2(i64 %x) nounwind readnone {
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 ; CHECK: test2:
-; CHECK: leaq -1(%rdi)
+; CHECK: leaq -1([[A0]])
 ; CHECK-NEXT: testq
 ; CHECK-NEXT: sete
 ; CHECK: ret
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll
index 4ab1bc6..12312e8 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -1,13 +1,14 @@
-; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-win32 -asm-verbose=false < %s | FileCheck %s
 ; rdar://8337108
 
 ; Fast-isel shouldn't try to look through the compare because it's in a
 ; different basic block, so its operands aren't necessarily exported
 ; for cross-block usage.
 
-; CHECK: movb    %al, 7(%rsp)
+; CHECK: movb    %al, [[OFS:[0-9]*]](%rsp)
 ; CHECK: callq   {{_?}}bar
-; CHECK: movb    7(%rsp), %al
+; CHECK: movb    [[OFS]](%rsp), %al
 
 declare void @bar()
 
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 622a1ff..fbe0243 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -O0 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
 
 ; GEP indices are interpreted as signed integers, so they
@@ -13,8 +14,8 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind {
 ; X32:  	ret
 
 ; X64: test1:
-; X64:  	movslq	%edi, %rax
-; X64:  	movl	(%rsi,%rax,4), %eax
+; X64:  	movslq	%e[[A0:di|cx]], %rax
+; X64:  	movl	(%r[[A1:si|dx]],%rax,4), %eax
 ; X64:  	ret
 
 }
@@ -27,7 +28,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
 ; X32:  	ret
 
 ; X64: test2:
-; X64:  	movl	(%rsi,%rdi,4), %eax
+; X64:  	movl	(%r[[A1]],%r[[A0]],4), %eax
 ; X64:  	ret
 }
 
@@ -47,7 +48,7 @@ entry:
 ; X32:  	ret
 
 ; X64: test3:
-; X64:  	movb	-2(%rdi), %al
+; X64:  	movb	-2(%r[[A0]]), %al
 ; X64:  	ret
 
 }
@@ -80,9 +81,9 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   %v11 = add i64 %B, %v10
   ret i64 %v11
 ; X64: test5:
-; X64: movslq	%esi, %rax
-; X64-NEXT: movq	(%rdi,%rax), %rax
-; X64-NEXT: addq	%rdx, %rax
+; X64: movslq	%e[[A1]], %rax
+; X64-NEXT: movq	(%r[[A0]],%rax), %rax
+; X64-NEXT: addq	%{{rdx|r8}}, %rax
 ; X64-NEXT: ret
 }
 
diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
index 0351eca..5614c80 100644
--- a/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/test/CodeGen/X86/fold-mul-lohi.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT: lea
 
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 134ee28..4a6927f 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,20 +1,21 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
 ; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
 ; bounce the vector off of cache rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK: andps    (%rdx), %xmm0
-; CHECK: movaps   %xmm0, -24(%rsp)
-; CHECK: movslq   -24(%rsp), %rax
-; CHECK: movsd    (%rdi,%rax,8), %xmm0
-; CHECK: movslq   -20(%rsp), %rax
-; CHECK: movhpd   (%rdi,%rax,8), %xmm0
-; CHECK: movslq   -16(%rsp), %rax
-; CHECK: movsd    (%rdi,%rax,8), %xmm1
-; CHECK: movslq   -12(%rsp), %rax
-; CHECK: movhpd   (%rdi,%rax,8), %xmm1
+; CHECK: andps    ([[H:%rdx|%r8]]), %xmm0
+; CHECK: movaps   %xmm0, {{(-24)?}}(%rsp)
+; CHECK: movslq   {{(-24)?}}(%rsp), %rax
+; CHECK: movsd    ([[P:%rdi|%rcx]],%rax,8), %xmm0
+; CHECK: movslq   {{-20|4}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm0
+; CHECK: movslq   {{-16|8}}(%rsp), %rax
+; CHECK: movsd    ([[P]],%rax,8), %xmm1
+; CHECK: movslq   {{-12|12}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %a = load <4 x i32>* %i
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index d30e6b3..0adb2b1 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -1,9 +1,29 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep mov %t | count 6
-; RUN: grep {movb	%ah, (%rsi)} %t | count 3
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep mov %t | count 3
-; RUN: grep {movb	%ah, (%e} %t | count 3
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64-NOT:      mov
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
 
 ; Use h-register extract and store.
 
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index e84bb9a..4354d26 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
 
 ; Use h registers. On x86-64, codegen doesn't support general allocation
@@ -9,6 +10,10 @@ define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
 ; X86-64: shrq $8, %rdi
 ; X86-64: incb %dil
 
+; WIN64:  bar64:
+; WIN64:  shrq $8, %rcx
+; WIN64:  incb %cl
+
 ; X86-32: bar64:
 ; X86-32: incb %ah
   %t0 = lshr i64 %x, 8
@@ -23,6 +28,10 @@ define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
 ; X86-64: shrl $8, %edi
 ; X86-64: incb %dil
 
+; WIN64:  bar32:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
 ; X86-32: bar32:
 ; X86-32: incb %ah
   %t0 = lshr i32 %x, 8
@@ -37,6 +46,10 @@ define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
 ; X86-64: shrl $8, %edi
 ; X86-64: incb %dil
 
+; WIN64:  bar16:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
 ; X86-32: bar16:
 ; X86-32: incb %ah
   %t0 = lshr i16 %x, 8
@@ -51,6 +64,9 @@ define i64 @qux64(i64 inreg %x) nounwind {
 ; X86-64: movq %rdi, %rax
 ; X86-64: movzbl %ah, %eax
 
+; WIN64:  qux64:
+; WIN64:  movzbl %ch, %eax
+
 ; X86-32: qux64:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i64 %x, 8
@@ -63,6 +79,9 @@ define i32 @qux32(i32 inreg %x) nounwind {
 ; X86-64: movl %edi, %eax
 ; X86-64: movzbl %ah, %eax
 
+; WIN64:  qux32:
+; WIN64:  movzbl %ch, %eax
+
 ; X86-32: qux32:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i32 %x, 8
@@ -75,6 +94,9 @@ define i16 @qux16(i16 inreg %x) nounwind {
 ; X86-64: movl %edi, %eax
 ; X86-64: movzbl %ah, %eax
 
+; WIN64:  qux16:
+; WIN64:  movzbl %ch, %eax
+
 ; X86-32: qux16:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i16 %x, 8
diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll
index 277f428..264f07c 100644
--- a/test/CodeGen/X86/i128-ret.ll
+++ b/test/CodeGen/X86/i128-ret.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86-64 | grep {movq	8(%rdi), %rdx}
-; RUN: llc < %s -march=x86-64 | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax
+; CHECK: movq 8([[A0]]), %rdx
 
 define i128 @test(i128 *%P) {
         %A = load i128* %P
diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
index 847e209..dce12ae 100644
--- a/test/CodeGen/X86/i64-mem-copy.ll
+++ b/test/CodeGen/X86/i64-mem-copy.ll
@@ -1,5 +1,9 @@
-; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s -check-prefix=X64
+; X64: movq ({{%rsi|%rdx}}), %r
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; X32: movsd (%eax), %xmm
 
 ; Uses movsd to load / store i64 values if sse2 is available.
 
diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
index 44413d6..040c5c2 100644
--- a/test/CodeGen/X86/lea-3.ll
+++ b/test/CodeGen/X86/lea-3.ll
@@ -1,16 +1,19 @@
-; RUN: llc < %s -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
-define i32 @test(i32 %a) {
-        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
-        ret i32 %tmp2
-}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
-; RUN: llc < %s -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
+; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax
 define i64 @test2(i64 %a) {
         %tmp2 = shl i64 %a, 2
 	%tmp3 = or i64 %tmp2, %a
         ret i64 %tmp3
 }
 
+; CHECK: leal ([[A0]],[[A0]],2), %eax
+define i32 @test(i32 %a) {
+        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
 ;; TODO!  LEA instead of shift + copy.
 define i64 @test3(i64 %a) {
         %tmp2 = shl i64 %a, 3
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 22a9644..5421355 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 define i32 @test1(i32 %x) nounwind {
         %tmp1 = shl i32 %x, 3
         %tmp2 = add i32 %tmp1, 7
         ret i32 %tmp2
 ; CHECK: test1:
-; CHECK:    leal 7(,%rdi,8), %eax
+; CHECK:    leal 7(,[[A0:%rdi|%rcx]],8), %eax
 }
 
 
@@ -27,8 +28,8 @@ bb.nph:
 bb2:
 	ret i32 %x_offs
 ; CHECK: test2:
-; CHECK:	leal	-5(%rdi), %eax
+; CHECK:	leal	-5([[A0]]), %eax
 ; CHECK:	andl	$-4, %eax
 ; CHECK:	negl	%eax
-; CHECK:	leal	-4(%rdi,%rax), %eax
+; CHECK:	leal	-4([[A0]],%rax), %eax
 }
diff --git a/test/CodeGen/X86/lsr-overflow.ll b/test/CodeGen/X86/lsr-overflow.ll
index 0b0214c..5bc4f7e 100644
--- a/test/CodeGen/X86/lsr-overflow.ll
+++ b/test/CodeGen/X86/lsr-overflow.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; The comparison uses the pre-inc value, which could lead LSR to
 ; try to compute -INT64_MIN.
 
 ; CHECK: movabsq $-9223372036854775808, %rax
-; CHECK: cmpq  %rax, %rbx
+; CHECK: cmpq  %rax,
 ; CHECK: sete  %al
 
 declare i64 @bar()
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index d1d7144..29f03d6 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; Full strength reduction wouldn't reduce register pressure, so LSR should
 ; stick with indexing here.
 
-; CHECK: movaps        (%rsi,%rax,4), %xmm3
-; CHECK: movaps        %xmm3, (%rdi,%rax,4)
+; CHECK: movaps        (%{{rsi|rdx}},%rax,4), %xmm3
+; CHECK: movaps        %xmm3, (%{{rdi|rcx}},%rax,4)
 ; CHECK: addq  $4, %rax
-; CHECK: cmpl  %eax, (%rdx)
+; CHECK: cmpl  %eax, (%{{rdx|r8}})
 ; CHECK-NEXT: jg
 
 define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0b4d73a..5f968b5 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,12 +1,37 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep and %t
-; RUN: not grep movz %t
-; RUN: not grep sar %t
-; RUN: not grep shl %t
-; RUN: grep add %t | count 2
-; RUN: grep inc %t | count 4
-; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     inc
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     dec
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     inc
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     dec
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     inc
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     addq $255, %r
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     addq $16777215, %r
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     lea
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     inc
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
+; CHECK:     lea
+; CHECK-NOT:     {{(inc|dec|lea)}}
+; CHECK-NOT:     {{(and|movz|sar|shl)}}
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index 36be1f3..f4bc1bb 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -1,4 +1,5 @@
-; RUN: llc %s -o - -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; This tests codegen time inlining/optimization of memcmp
 ; rdar://6480398
@@ -20,8 +21,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp2:
-; CHECK: movw    (%rdi), %ax
-; CHECK: cmpw    (%rsi), %ax
+; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
+; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
 }
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
@@ -37,7 +38,7 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp2a:
-; CHECK: cmpw    $28527, (%rdi)
+; CHECK: cmpw    $28527, ([[A0]])
 }
 
 
@@ -54,8 +55,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp4:
-; CHECK: movl    (%rdi), %eax
-; CHECK: cmpl    (%rsi), %eax
+; CHECK: movl    ([[A0]]), %eax
+; CHECK: cmpl    ([[A1]]), %eax
 }
 
 define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
@@ -71,7 +72,7 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp4a:
-; CHECK: cmpl $1869573999, (%rdi)
+; CHECK: cmpl $1869573999, ([[A0]])
 }
 
 define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
@@ -87,8 +88,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp8:
-; CHECK: movq    (%rdi), %rax
-; CHECK: cmpq    (%rsi), %rax
+; CHECK: movq    ([[A0]]), %rax
+; CHECK: cmpq    ([[A1]]), %rax
 }
 
 define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
@@ -105,6 +106,6 @@ return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp8a:
 ; CHECK: movabsq $8029759185026510694, %rax
-; CHECK: cmpq	%rax, (%rdi)
+; CHECK: cmpq	%rax, ([[A0]])
 }
 
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 3607043..6cb21ca 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llc < %s -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s -check-prefix=X64
+; X64: movq ({{%rsi|%rdx}}), %rax
+; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s -check-prefix=X32
+; X32: movl 4(%eax),
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=XMM
+; XMM: movsd (%eax),
 
 ; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
 ; increases the places that need to use emms.
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index 00190e8..97b7fe7 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
 entry:
@@ -30,7 +31,7 @@ entry:
 ; X32: calll	*%gs:(%eax)
 
 ; X64: test2:
-; X64: callq	*%gs:(%rdi)
+; X64: callq	*%gs:([[A0:%rdi|%rcx]])
 
 
 
@@ -50,7 +51,7 @@ entry:
 ; X32: 	ret
 
 ; X64: pmovsxwd_1:
-; X64:	pmovsxwd	%gs:(%rdi), %xmm0
+; X64:	pmovsxwd	%gs:([[A0]]), %xmm0
 ; X64:	ret
 }
 
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index f1e3c27..b90413d 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
@@ -40,13 +41,13 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;      CHECK:         jle
 ;  CHECK-NOT:         cmov
-;      CHECK:         xorl    %edi, %edi
+;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx}}
 ; CHECK-NEXT:         align
 ; CHECK-NEXT: BB1_2:
 ; CHECK-NEXT:         callq
-; CHECK-NEXT:         incl    %ebx
-; CHECK-NEXT:         cmpl    %r14d, %ebx
-; CHECK-NEXT:         movq    %rax, %rdi
+; CHECK-NEXT:         incl    [[BX:%ebx|%esi]]
+; CHECK-NEXT:         cmpl    [[R14:%r14d|%edi]], [[BX]]
+; CHECK-NEXT:         movq    %rax, %r{{di|cx}}
 ; CHECK-NEXT:         jl
 
 define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index e4ab2b5..1eed317 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse41 | FileCheck %s
+; CHECK: pshufd $3, %xmm0, %xmm0
+
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse41 | FileCheck %s -check-prefix=WIN64
+; WIN64: movss   12(%rcx), %xmm0
 
 define float @foo(<8 x float> %a) nounwind {
   %c = extractelement <8 x float> %a, i32 3
diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll
index 3ef5941..fc8791a 100644
--- a/test/CodeGen/X86/pmulld.ll
+++ b/test/CodeGen/X86/pmulld.ll
@@ -1,8 +1,13 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK: test1:
 ; CHECK-NEXT: pmulld
+
+; WIN64: test1:
+; WIN64:      movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
   %C = mul <4 x i32> %A, %B
   ret <4 x i32> %C
 }
@@ -10,6 +15,11 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
 ; CHECK: test1a:
 ; CHECK-NEXT: pmulld
+
+; WIN64: test1a:
+; WIN64:      movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
+
   %B = load <4 x i32>* %Bp
   %C = mul <4 x i32> %A, %B
   ret <4 x i32> %C
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
index 45b0c6c..d61e165 100644
--- a/test/CodeGen/X86/pr9127.ll
+++ b/test/CodeGen/X86/pr9127.ll
@@ -9,4 +9,4 @@ entry:
 }
 
 ; test that the load is folded.
-; CHECK: ucomisd	(%rdi), %xmm0
+; CHECK: ucomisd	(%{{rdi|rdx}}), %xmm0
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index 1ffb4e3..d936971 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
 ; CHECK: f0:
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
index 5fb445c..f89cd33 100644
--- a/test/CodeGen/X86/remat-mov-0.ll
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; CodeGen should remat the zero instead of spilling it.
 
 declare void @foo(i64 %p)
 
 ; CHECK: bar:
-; CHECK: xorl %edi, %edi
-; CHECK: xorl %edi, %edi
+; CHECK: xorl %e[[A0:di|cx]], %e
+; CHECK: xorl %e[[A0]], %e[[A0]]
 define void @bar() nounwind {
   call void @foo(i64 0)
   call void @foo(i64 0)
@@ -14,8 +15,8 @@ define void @bar() nounwind {
 }
 
 ; CHECK: bat:
-; CHECK: movq $-1, %rdi
-; CHECK: movq $-1, %rdi
+; CHECK: movq $-1, %r[[A0]]
+; CHECK: movq $-1, %r[[A0]]
 define void @bat() nounwind {
   call void @foo(i64 -1)
   call void @foo(i64 -1)
@@ -23,8 +24,8 @@ define void @bat() nounwind {
 }
 
 ; CHECK: bau:
-; CHECK: movl $1, %edi
-; CHECK: movl $1, %edi
+; CHECK: movl $1, %e[[A0]]
+; CHECK: movl $1, %e[[A0]]
 define void @bau() nounwind {
   call void @foo(i64 1)
   call void @foo(i64 1)
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index fe40758..cf0ebbf 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -1,6 +1,34 @@
-; RUN: llc < %s -march=x86-64 | grep min | count 1
-; RUN: llc < %s -march=x86-64 | grep max | count 1
-; RUN: llc < %s -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     min
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     max
+; CHECK-NOT:     {{(min|max|mov)}}
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; WIN64: {{^foo:}}
+; WIN64:      subq    $56, %rsp
+; WIN64-NEXT: movaps  %xmm6, 32(%rsp)
+; WIN64-NEXT: movaps  %xmm0, %xmm6
+; WIN64-NEXT: callq   bar
+; WIN64-NEXT: minss   %xmm6, %xmm0
+; WIN64-NEXT: movaps  32(%rsp), %xmm6
+; WIN64-NEXT: addq    $56, %rsp
+; WIN64-NEXT: ret
+
+; WIN64: {{^hem:}}
+; WIN64:      subq    $56, %rsp
+; WIN64-NEXT: movaps  %xmm6, 32(%rsp)
+; WIN64-NEXT: movaps  %xmm0, %xmm6
+; WIN64-NEXT: callq   bar
+; WIN64-NEXT: maxss   %xmm6, %xmm0
+; WIN64-NEXT: movaps  32(%rsp), %xmm6
+; WIN64-NEXT: addq    $56, %rsp
+; WIN64-NEXT: ret
 
 declare float @bar()
 
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index b12a87d..50fdd76 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -1,12 +1,26 @@
-; RUN: llc < %s -march=x86-64 | not grep mov
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
+
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s -check-prefix=WIN64
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
   %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
+
+; WIN64:  foo:
+; WIN64-NEXT: movaps  (%rcx), %xmm0
+; WIN64-NEXT: mulps   (%rdx), %xmm0
+; WIN64-NEXT: ret
+
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p
   %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
+
+; WIN64:  bar:
+; WIN64-NEXT: movapd  (%rcx), %xmm0
+; WIN64-NEXT: mulpd   (%rdx), %xmm0
+; WIN64-NEXT: ret
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index c42f7f0..03b0a29 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,10 +1,28 @@
-; RUN: llc < %s -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     movapd
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+; CHECK:     movapd
+; CHECK-NOT:     movap
+
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s -check-prefix=WIN64
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p
   ret void
 }
+
+; WIN64: foo:
+; WIN64-NEXT: movaps  (%rdx), %xmm0
+; WIN64-NEXT: movaps  %xmm0, (%rcx)
+; WIN64-NEXT: ret
+
 define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   store <2 x double> %x, <2 x double>* %p
   ret void
 }
+
+; WIN64: bar:
+; WIN64-NEXT: movapd  (%rdx), %xmm0
+; WIN64-NEXT: movapd  %xmm0, (%rcx)
+; WIN64-NEXT: ret
diff --git a/test/CodeGen/X86/sse-align-7.ll b/test/CodeGen/X86/sse-align-7.ll
index 5784481..5479ca2 100644
--- a/test/CodeGen/X86/sse-align-7.ll
+++ b/test/CodeGen/X86/sse-align-7.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; WIN64: movaps (%rdx), [[XMM:%xmm[0-7]+]]
+; WIN64: movaps [[XMM]], (%rcx)
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-commute.ll b/test/CodeGen/X86/sse-commute.ll
index 38ed644..1a3390a 100644
--- a/test/CodeGen/X86/sse-commute.ll
+++ b/test/CodeGen/X86/sse-commute.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
 
 ; Commute the comparison to avoid a move.
 ; PR7500.
@@ -17,4 +17,10 @@ entry:
   ret <2 x double> %tmp8
 }
 
-
+; RUN: llc -mtriple=x86_64-win32 -asm-verbose=false < %s | FileCheck %s -check-prefix=WIN64
+; WIN64: a:
+; WIN64-NEXT: movdqa  (%rdx), %xmm0
+; WIN64-NEXT: movdqa  (%rcx), %xmm1
+; WIN64-NEXT: pcmpeqd %xmm1, %xmm0
+; WIN64-NEXT: pand    %xmm1, %xmm0
+; WIN64-NEXT: ret
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index dc3d6fe..02399c4 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
-; RUN:   grep fail | count 1
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& FileCheck %s
+; CHECK: fail
+; CHECK-NOT: fail
 
 declare float @test_f(float %f)
 declare double @test_d(double %f)
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
index 9778fa1..29d91e8 100644
--- a/test/CodeGen/X86/stdarg.ll
+++ b/test/CodeGen/X86/stdarg.ll
@@ -1,4 +1,18 @@
-; RUN: llc < %s -march=x86-64 | grep {testb	\[%\]al, \[%\]al}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK: testb %al, %al
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; WIN64: {{^foo:}}
+; WIN64:      subq    $56, %rsp
+; WIN64-NEXT: movq    %r9, 88(%rsp)
+; WIN64-NEXT: movq    %r8, 80(%rsp)
+; WIN64-NEXT: movq    %rdx, 72(%rsp)
+; WIN64-NEXT: leaq    72(%rsp), %rax
+; WIN64-NEXT: movq    %rax, 32(%rsp)
+; WIN64-NEXT: leaq    32(%rsp), %rcx
+; WIN64-NEXT: callq   bar
+; WIN64-NEXT: addq    $56, %rsp
+; WIN64-NEXT: ret
 
 %struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index f4847a3..59fa8b8 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | not grep lea
-; RUN: llc < %s -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux               | FileCheck %s
+; CHECK-NOT:     lea
 
 ; P should be sunk into the loop and folded into the address mode. There
 ; shouldn't be any lea instructions inside the loop.
@@ -35,3 +36,32 @@ return:
 	ret void
 }
 
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s -check-prefix=WIN64
+; WIN64: foo:
+; WIN64:      subq    $16, %rsp
+; WIN64-NEXT: movq    %rsi, (%rsp)
+; WIN64-NEXT: movq    %rdi, 8(%rsp)
+; WIN64-NEXT: testl   %ecx, %ecx
+; WIN64-NEXT: jle     .LBB0_3
+; WIN64-NEXT: xorl    %eax, %eax
+; WIN64-NEXT: leaq    B(%rip), %rsi
+; WIN64-NEXT: leaq    A(%rip), %rdi
+; WIN64-NEXT: leaq    P(%rip), %r8
+; WIN64-NEXT: leaq    Q(%rip), %r9
+; WIN64: .LBB0_2:
+; WIN64-NEXT: movslq  %eax, %rax
+; WIN64-NEXT: movb    (%rax,%rsi), %r10b
+; WIN64-NEXT: addb    %r10b, %r10b
+; WIN64-NEXT: movb    %r10b, (%rax,%rdi)
+; WIN64-NEXT: movslq  %edx, %rdx
+; WIN64-NEXT: movb    $17, (%rdx,%r8)
+; WIN64-NEXT: movb    $19, (%rdx,%r9)
+; WIN64-NEXT: addl    $9, %edx
+; WIN64-NEXT: incl    %eax
+; WIN64-NEXT: cmpl    %eax, %ecx
+; WIN64-NEXT: jne     .LBB0_2
+; WIN64-NEXT: LBB0_3:
+; WIN64-NEXT: movq    8(%rsp), %rdi
+; WIN64-NEXT: movq    (%rsp), %rsi
+; WIN64-NEXT: addq    $16, %rsp
+; WIN64-NEXT: ret
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 5cbd895..55251d9 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
 @A = external global [1000 x float], align 32
@@ -28,3 +29,31 @@ bb:
 return:
 	ret void
 }
+
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s -check-prefix=WIN64
+; WIN64: foo:
+; WIN64:      subq    $16, %rsp
+; WIN64-NEXT: movq    %rsi, (%rsp)
+; WIN64-NEXT: movq    %rdi, 8(%rsp)
+; WIN64-NEXT: testl   %ecx, %ecx
+; WIN64-NEXT: jle     .LBB0_3
+; WIN64-NEXT: xorl    %eax, %eax
+; WIN64-NEXT: movl    $64, %edx
+; WIN64-NEXT: leaq    B(%rip), %rsi
+; WIN64-NEXT: leaq    A(%rip), %rdi
+; WIN64-NEXT: leaq    P(%rip), %r8
+; WIN64: .LBB0_2:
+; WIN64-NEXT: movslq  %eax, %rax
+; WIN64-NEXT: movss   (%rsi,%rax,4), %xmm0
+; WIN64-NEXT: addss   %xmm0, %xmm0
+; WIN64-NEXT: movss   %xmm0, (%rdi,%rax,4)
+; WIN64-NEXT: movl    %edx, (%r8,%rax,4)
+; WIN64-NEXT: addl    $2, %edx
+; WIN64-NEXT: incl    %eax
+; WIN64-NEXT: cmpl    %eax, %ecx
+; WIN64-NEXT: jne     .LBB0_2
+; WIN64-NEXT: .LBB0_3:
+; WIN64-NEXT: movq    8(%rsp), %rdi
+; WIN64-NEXT: movq    (%rsp), %rsi
+; WIN64-NEXT: addq    $16, %rsp
+; WIN64-NEXT: ret
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7c685b8..1b1efe7 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,15 +1,30 @@
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
+; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+
+; FIXME: Win64 does not support byval.
+
+; Expect the entry point.
+; CHECK: tailcaller:
+
 ; Expect 2 rep;movs because of tail call byval lowering.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; CHECK: rep;
+; CHECK: rep;
+
 ; A sequence of copyto/copyfrom virtual registers is used to deal with byval
 ; lowering appearing after moving arguments to registers. The following two
 ; checks verify that the register allocator changes those sequences to direct
 ; moves to argument register where it can (for registers that are not used in 
 ; byval lowering - not rsi, not rdi, not rcx).
 ; Expect argument 4 to be moved directly to register edx.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
+; CHECK: movl $7, %edx
+
 ; Expect argument 6 to be moved directly to register r8.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
+; CHECK: movl $17, %r8d
+
+; Expect not call but jmp to @tailcallee.
+; CHECK: jmp tailcallee
+
+; Expect the trailer.
+; CHECK: .size tailcaller
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
@@ -25,5 +40,3 @@ entry:
         %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* %a byval, i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
         ret i64 %tmp4
 }
-
-
diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll
index 1d63693..5bc28ec 100644
--- a/test/CodeGen/X86/test-shrink.ll
+++ b/test/CodeGen/X86/test-shrink.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
 
 ; CHECK-64: g64xh:
-; CHECK-64:   testb $8, %ah
+; CHECK-64:   testb $8, {{%ah|%ch}}
 ; CHECK-64:   ret
 ; CHECK-32: g64xh:
 ; CHECK-32:   testb $8, %ah
@@ -19,7 +20,7 @@ no:
   ret void
 }
 ; CHECK-64: g64xl:
-; CHECK-64:   testb $8, %dil
+; CHECK-64:   testb $8, [[A0L:%dil|%cl]]
 ; CHECK-64:   ret
 ; CHECK-32: g64xl:
 ; CHECK-32:   testb $8, %al
@@ -36,7 +37,7 @@ no:
   ret void
 }
 ; CHECK-64: g32xh:
-; CHECK-64:   testb $8, %ah
+; CHECK-64:   testb $8, {{%ah|%ch}}
 ; CHECK-64:   ret
 ; CHECK-32: g32xh:
 ; CHECK-32:   testb $8, %ah
@@ -53,7 +54,7 @@ no:
   ret void
 }
 ; CHECK-64: g32xl:
-; CHECK-64:   testb $8, %dil
+; CHECK-64:   testb $8, [[A0L]]
 ; CHECK-64:   ret
 ; CHECK-32: g32xl:
 ; CHECK-32:   testb $8, %al
@@ -70,7 +71,7 @@ no:
   ret void
 }
 ; CHECK-64: g16xh:
-; CHECK-64:   testb $8, %ah
+; CHECK-64:   testb $8, {{%ah|%ch}}
 ; CHECK-64:   ret
 ; CHECK-32: g16xh:
 ; CHECK-32:   testb $8, %ah
@@ -87,7 +88,7 @@ no:
   ret void
 }
 ; CHECK-64: g16xl:
-; CHECK-64:   testb $8, %dil
+; CHECK-64:   testb $8, [[A0L]]
 ; CHECK-64:   ret
 ; CHECK-32: g16xl:
 ; CHECK-32:   testb $8, %al
@@ -104,7 +105,7 @@ no:
   ret void
 }
 ; CHECK-64: g64x16:
-; CHECK-64:   testw $-32640, %di
+; CHECK-64:   testw $-32640, %[[A0W:di|cx]]
 ; CHECK-64:   ret
 ; CHECK-32: g64x16:
 ; CHECK-32:   testw $-32640, %ax
@@ -121,7 +122,7 @@ no:
   ret void
 }
 ; CHECK-64: g32x16:
-; CHECK-64:   testw $-32640, %di
+; CHECK-64:   testw $-32640, %[[A0W]]
 ; CHECK-64:   ret
 ; CHECK-32: g32x16:
 ; CHECK-32:   testw $-32640, %ax
@@ -138,7 +139,7 @@ no:
   ret void
 }
 ; CHECK-64: g64x32:
-; CHECK-64:   testl $268468352, %edi
+; CHECK-64:   testl $268468352, %e[[A0W]]
 ; CHECK-64:   ret
 ; CHECK-32: g64x32:
 ; CHECK-32:   testl $268468352, %eax
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index c2f0c23..4aed7f0 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -o - | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -o - | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -o - | FileCheck %s
 
 ; Reuse the flags value from the add instructions instead of emitting separate
 ; testl instructions.
@@ -6,10 +7,10 @@
 ; Use the flags on the add.
 
 ; CHECK: test1:
-;      CHECK: addl    (%rdi), %esi
-; CHECK-NEXT: movl    %edx, %eax
-; CHECK-NEXT: cmovnsl %ecx, %eax
-; CHECK-NEXT: ret
+;      CHECK: addl    {{\(%rdi\), %esi|\(%rcx\), %edx}}
+; CHECK-NEXT: movl    {{%edx|%r8d}}, %eax
+; CHECK-NEXT: cmovnsl {{%ecx|%r9d}}, %eax
+; CHECK:      ret
 
 define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
 	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
@@ -25,7 +26,7 @@ declare void @foo(i32)
 ; other use. A simple test is better.
 
 ; CHECK: test2:
-; CHECK: testb   $16, %dil
+; CHECK: testb   $16, {{%dil|%cl}}
 
 define void @test2(i32 %x) nounwind {
   %y = and i32 %x, 16
@@ -41,7 +42,7 @@ false:
 ; Do use the flags result of the and here, since the and has another use.
 
 ; CHECK: test3:
-;      CHECK: andl    $16, %edi
+;      CHECK: andl    $16, {{%edi|%ecx}}
 ; CHECK-NEXT: jne
 
 define void @test3(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index 76c3fdf..6d14099 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
 ; RUN: llc < %s -mcpu=yonah -march=x86 -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
 
 ; PR7518
@@ -15,6 +16,13 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
 ; X64-NEXT: movss	%xmm1, (%rdi)
 ; X64-NEXT: ret
 
+; W64: test1:
+; W64-NEXT: movdqa  (%rcx), %xmm0
+; W64-NEXT: pshufd  $1, %xmm0, %xmm1
+; W64-NEXT: addss   %xmm0, %xmm1
+; W64-NEXT: movss   %xmm1, (%rdx)
+; W64-NEXT: ret
+
 ; X32: test1:
 ; X32-NEXT: pshufd	$1, %xmm0, %xmm1
 ; X32-NEXT: addss	%xmm0, %xmm1
@@ -31,6 +39,14 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
 ; X64: test2:
 ; X64-NEXT: addps	%xmm1, %xmm0
 ; X64-NEXT: ret
+
+; W64: test2:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   (%rdx), %xmm0
+; W64-NEXT: ret
+
+; X32: test2:
+; X32:      addps	%xmm1, %xmm0
 }
 
 
@@ -38,17 +54,35 @@ define <2 x float> @test3(<4 x float> %A) nounwind {
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
 	ret <2 x float> %C
-; CHECK: test3:
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	ret
+; X64: test3:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test3:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test3:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
 }
 
 define <2 x float> @test4(<2 x float> %A) nounwind {
 	%C = fadd <2 x float> %A, %A
 	ret <2 x float> %C
-; CHECK: test4:
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	ret
+; X64: test4:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test4:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test4:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
 }
 
 define <4 x float> @test5(<4 x float> %A) nounwind {
@@ -61,10 +95,21 @@ BB:
 	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 	ret <4 x float> %E
         
-; CHECK: _test5:
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	ret
+; X64: test5:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test5:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test5:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
 }
 
 
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index 95289c9..6263d58 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2
-
+; PR8311
+; XFAIL: mingw,win32
 
 define <8 x i32> @a(<8 x i16> %a) nounwind {
   %c = sext <8 x i16> %a to <8 x i32>
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index 9697f11..66056d0 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86-64 | not grep movsd
-; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: movsd
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: movsd
 
 define <2 x i64> @test(i64 %i) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index 9c33abb..ebc8c5b 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi, %xmm0}
-; RUN: llc < %s -march=x86-64 | not grep xor
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: xor
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: xor
 ; PR2108
 
 define <2 x i64> @doload64(i64 %x) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index b090930..2efdb14 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 ; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
-; CHECK: movaps  (%rdi), %xmm0
+; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
 ; CHECK-NEXT: movaps  %xmm0, %xmm1
 ; CHECK-NEXT: movlps  (%rax), %xmm1
 ; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index f6c4af0..82c8252 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
@@ -8,6 +9,11 @@
 ; CHECK: movl  %ecx, (%rdi)
 ; CHECK: movl  %eax, (%rsi)
 
+; WIN64: movl  (%rcx), %eax
+; WIN64: movl  (%rdx), %esi
+; WIN64: movl  %esi, (%rcx)
+; WIN64: movl  %eax, (%rdx)
+
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
   %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
index b4f1fa6..4aa0ec3 100644
--- a/test/CodeGen/X86/x86-64-malloc.ll
+++ b/test/CodeGen/X86/x86-64-malloc.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86-64 | grep {shll.*3, %edi}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: shll $3, {{%edi|%ecx}}
 ; PR3829
 ; The generated code should multiply by 3 (sizeof i8*) as an i32,
 ; not as an i64!
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 6c623cb..b90d81a 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
 
 ; Though it is undefined, we want xor undef,undef to produce zero.
 define <4 x i32> @test1() nounwind {
@@ -28,9 +29,9 @@ entry:
         ret i32 %tmp4
         
 ; X64: test3:
-; X64:	notl	%esi
-; X64:	andl	%edi, %esi
-; X64:	movl	%esi, %eax
+; X64:	notl	[[A1:%esi|%edx]]
+; X64:	andl	[[A0:%edi|%ecx]], [[A1]]
+; X64:	movl	[[A1]], %eax
 ; X64:	shrl	%eax
 ; X64:	ret