[llvm] r294824 - [X86] Don't base domain decisions on VEXTRACTF128/VINSERTF128 if only AVX1 is available.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 10 21:32:58 PST 2017


Author: ctopper
Date: Fri Feb 10 23:32:57 2017
New Revision: 294824

URL: http://llvm.org/viewvc/llvm-project?rev=294824&view=rev
Log:
[X86] Don't base domain decisions on VEXTRACTF128/VINSERTF128 if only AVX1 is available.

Seems the execution dependency pass likes to use FP instructions when most of the consuming code is integer if a vextractf128 instruction produced the register. Without AVX2 we don't have the corresponding integer instruction available.

This patch suppresses the domain on these instructions to GenericDomain if AVX2 is not supported so that they are ignored by domain fixing. If AVX2 is supported we'll report the correct domain and allow them to switch between integer and fp.

Overall I think this produces better results in the modified test cases.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll
    llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
    llvm/trunk/test/CodeGen/X86/oddshuffles.ll
    llvm/trunk/test/CodeGen/X86/psubus.ll
    llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
    llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
    llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll
    llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
    llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll
    llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
    llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
    llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
    llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc.ll
    llvm/trunk/test/CodeGen/X86/viabs.ll
    llvm/trunk/test/CodeGen/X86/vselect-minmax.ll
    llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Feb 10 23:32:57 2017
@@ -8863,10 +8863,6 @@ static const uint16_t ReplaceableInstrsA
   { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
   { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
   { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
-  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
-  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
-  { X86::VINSERTF128rm,  X86::VINSERTF128rm,  X86::VINSERTI128rm },
-  { X86::VINSERTF128rr,  X86::VINSERTF128rr,  X86::VINSERTI128rr },
   { X86::VPERM2F128rm,   X86::VPERM2F128rm,   X86::VPERM2I128rm },
   { X86::VPERM2F128rr,   X86::VPERM2F128rr,   X86::VPERM2I128rr },
   { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
@@ -8878,6 +8874,14 @@ static const uint16_t ReplaceableInstrsA
   { X86::VBROADCASTF128,  X86::VBROADCASTF128,  X86::VBROADCASTI128 },
 };
 
+static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
+  //PackedSingle       PackedDouble       PackedInt
+  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+  { X86::VINSERTF128rm,  X86::VINSERTF128rm,  X86::VINSERTI128rm },
+  { X86::VINSERTF128rr,  X86::VINSERTF128rr,  X86::VINSERTI128rr },
+};
+
 static const uint16_t ReplaceableInstrsAVX512[][4] = {
   // Two integer columns for 64-bit and 32-bit elements.
   //PackedSingle        PackedDouble        PackedInt             PackedInt
@@ -9139,6 +9143,12 @@ X86InstrInfo::getExecutionDomain(const M
       validDomains = 0xe;
     } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
       validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+    } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
+      // Insert/extract instructions should only effect domain if AVX2
+      // is enabled.
+      if (!Subtarget.hasAVX2())
+        return std::make_pair(0, 0);
+      validDomains = 0xe;
     } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
       validDomains = 0xe;
     } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain,
@@ -9167,6 +9177,11 @@ void X86InstrInfo::setExecutionDomain(Ma
            "256-bit vector operations only available in AVX2");
     table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
   }
+  if (!table) { // try the other table
+    assert(Subtarget.hasAVX2() &&
+           "256-bit insert/extract only available in AVX2");
+    table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
+  }
   if (!table) { // try the AVX512 table
     assert(Subtarget.hasAVX512() && "Requires AVX-512");
     table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);

Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll Fri Feb 10 23:32:57 2017
@@ -625,7 +625,7 @@ define <4 x double> @merge_4f64_f64_34uz
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX1-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -650,7 +650,7 @@ define <4 x double> @merge_4f64_f64_34uz
 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X32-AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; X32-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; X32-AVX-NEXT:    retl
   %ptr0 = getelementptr inbounds double, double* %ptr, i64 3

Modified: llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll Fri Feb 10 23:32:57 2017
@@ -752,7 +752,7 @@ define <8 x i32> @test_arg_v8i32(<8 x i3
 ;
 ; AVX1-LABEL: test_arg_v8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm1
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
@@ -804,7 +804,7 @@ define <4 x i64> @test_arg_v4i64(<4 x i6
 ;
 ; AVX1-LABEL: test_arg_v4i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm1
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
@@ -835,7 +835,7 @@ define <16 x i16> @test_arg_v16i16(<16 x
 ;
 ; AVX1-LABEL: test_arg_v16i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm1
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
@@ -866,7 +866,7 @@ define <32 x i8> @test_arg_v32i8(<32 x i
 ;
 ; AVX1-LABEL: test_arg_v32i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm1
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
@@ -925,8 +925,8 @@ define <16 x i32> @test_arg_v16i32(<16 x
 ;
 ; AVX1-LABEL: test_arg_v16i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm2
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
+; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
 ; AVX1-NEXT:    vpaddd %xmm5, %xmm4, %xmm4
@@ -989,8 +989,8 @@ define <8 x i64> @test_arg_v8i64(<8 x i6
 ;
 ; AVX1-LABEL: test_arg_v8i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm2
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
+; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
 ; AVX1-NEXT:    vpaddq %xmm5, %xmm4, %xmm4
@@ -1029,8 +1029,8 @@ define <32 x i16> @test_arg_v32i16(<32 x
 ;
 ; AVX1-LABEL: test_arg_v32i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm2
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
+; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
 ; AVX1-NEXT:    vpaddw %xmm5, %xmm4, %xmm4
@@ -1081,8 +1081,8 @@ define <64 x i8> @test_arg_v64i8(<64 x i
 ;
 ; AVX1-LABEL: test_arg_v64i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm2
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
+; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
 ; AVX1-NEXT:    vpaddb %xmm5, %xmm4, %xmm4

Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Fri Feb 10 23:32:57 2017
@@ -923,7 +923,7 @@ define void @interleave_24i16_out(<24 x
 ; AVX1-LABEL: interleave_24i16_out:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovdqu 32(%rdi), %xmm0
-; AVX1-NEXT:    vmovups (%rdi), %ymm1
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm1[0],xmm2[1],xmm1[2,3],xmm2[4],xmm1[5,6],xmm2[7]
 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[0,1,6,7,12,13,2,3,8,9,14,15,12,13,14,15]
@@ -1445,8 +1445,8 @@ define <2 x double> @wrongorder(<4 x dou
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
-; AVX1-NEXT:    vmovapd %ymm1, 32(%rdi)
-; AVX1-NEXT:    vmovapd %ymm1, (%rdi)
+; AVX1-NEXT:    vmovaps %ymm1, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Fri Feb 10 23:32:57 2017
@@ -213,7 +213,7 @@ define void @test7(i16* nocapture %head)
 ;
 ; AVX1-LABEL: test7:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm2, %xmm1
@@ -257,13 +257,13 @@ define void @test8(i16* nocapture %head)
 ;
 ; AVX1-LABEL: test8:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [65534,65534,65534,65534,65534,65534,65534,65534]
 ; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
@@ -310,7 +310,7 @@ define void @test9(i16* nocapture %head,
 ;
 ; AVX1-LABEL: test9:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vmovd %esi, %xmm2
 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
@@ -364,7 +364,7 @@ define void @test10(i8* nocapture %head)
 ;
 ; AVX1-LABEL: test10:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
@@ -409,13 +409,13 @@ define void @test11(i8* nocapture %head)
 ;
 ; AVX1-LABEL: test11:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
 ; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
@@ -475,7 +475,7 @@ define void @test12(i8* nocapture %head,
 ;
 ; AVX1-LABEL: test12:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vmovd %esi, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
@@ -589,7 +589,7 @@ define void @test13(i16* nocapture %head
 ;
 ; AVX1-LABEL: test13:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rsi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
@@ -598,7 +598,7 @@ define void @test13(i16* nocapture %head
 ; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm6, %xmm4
 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
@@ -791,8 +791,8 @@ define void @test14(i8* nocapture %head,
 ;
 ; AVX1-LABEL: test14:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rsi), %ymm0
-; AVX1-NEXT:    vmovups 32(%rsi), %ymm1
+; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
+; AVX1-NEXT:    vmovdqu 32(%rsi), %ymm1
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm8 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm9 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm10 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -803,17 +803,17 @@ define void @test14(i8* nocapture %head,
 ; AVX1-NEXT:    vpxor %xmm6, %xmm2, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm6, %xmm10, %xmm7
-; AVX1-NEXT:    vxorps %xmm6, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm4, %xmm4
 ; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm11 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
 ; AVX1-NEXT:    vpshufb %xmm11, %xmm3, %xmm12
 ; AVX1-NEXT:    vpxor %xmm6, %xmm9, %xmm7
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm6, %xmm4, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm6, %xmm8, %xmm7
-; AVX1-NEXT:    vxorps %xmm6, %xmm0, %xmm6
+; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm6
 ; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm6, %xmm6
 ; AVX1-NEXT:    vpacksswb %xmm3, %xmm6, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm11, %xmm3, %xmm3
@@ -966,7 +966,7 @@ define void @test15(i16* nocapture %head
 ;
 ; AVX1-LABEL: test15:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rsi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
@@ -975,7 +975,7 @@ define void @test15(i16* nocapture %head
 ; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
 ; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
@@ -1096,7 +1096,7 @@ define void @test16(i16* nocapture %head
 ;
 ; AVX1-LABEL: test16:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovups (%rsi), %ymm0
+; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
@@ -1105,7 +1105,7 @@ define void @test16(i16* nocapture %head
 ; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
 ; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll Fri Feb 10 23:32:57 2017
@@ -1298,7 +1298,7 @@ define void @fallback_broadcast_v4i64_to
 ; X32-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X32-AVX1:       ## BB#0: ## %entry
 ; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm4 = [1,0,2,0,3,0,4,0]
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} ymm4 = [1,0,2,0,3,0,4,0]
 ; X32-AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm5
 ; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm3, %xmm3
 ; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm0, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll Fri Feb 10 23:32:57 2017
@@ -254,7 +254,7 @@ define <32 x i8> @combine_16_byte_loads_
 define <4 x double> @combine_16_byte_loads_double(<2 x double>* %ptr, <4 x double> %x) {
 ; AVXSLOW-LABEL: combine_16_byte_loads_double:
 ; AVXSLOW:       # BB#0:
-; AVXSLOW-NEXT:    vmovupd 144(%rdi), %xmm1
+; AVXSLOW-NEXT:    vmovups 144(%rdi), %xmm1
 ; AVXSLOW-NEXT:    vinsertf128 $1, 160(%rdi), %ymm1, %ymm1
 ; AVXSLOW-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
 ; AVXSLOW-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fptrunc.ll Fri Feb 10 23:32:57 2017
@@ -102,7 +102,7 @@ define void @fptrunc_frommem8(<8 x doubl
 ; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
 ; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
 ; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX-NEXT:    vmovupd %ymm0, (%eax)
+; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
 ; X32-AVX-NEXT:    vzeroupper
 ; X32-AVX-NEXT:    retl
 ;
@@ -123,7 +123,7 @@ define void @fptrunc_frommem8(<8 x doubl
 ; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
 ; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
 ; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX-NEXT:    vmovupd %ymm0, (%rsi)
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
 entry:

Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Fri Feb 10 23:32:57 2017
@@ -2723,7 +2723,7 @@ define <4 x double> @sitofp_load_4i64_to
 ;
 ; AVX1-LABEL: sitofp_load_4i64_to_4f64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
 ; AVX1-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
@@ -3130,7 +3130,7 @@ define <4 x double> @uitofp_load_4i64_to
 ;
 ; AVX1-LABEL: uitofp_load_4i64_to_4f64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]

Modified: llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_minmax_uint.ll Fri Feb 10 23:32:57 2017
@@ -167,13 +167,13 @@ define <4 x i64> @max_gt_v4i64(<4 x i64>
 ; AVX1-LABEL: max_gt_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
@@ -604,15 +604,15 @@ define <4 x i64> @max_ge_v4i64(<4 x i64>
 ; AVX1-LABEL: max_ge_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm5
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
@@ -1031,13 +1031,13 @@ define <4 x i64> @min_lt_v4i64(<4 x i64>
 ; AVX1-LABEL: min_lt_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
@@ -1467,15 +1467,15 @@ define <4 x i64> @min_le_v4i64(<4 x i64>
 ; AVX1-LABEL: min_le_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm5
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2

Modified: llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll Fri Feb 10 23:32:57 2017
@@ -613,8 +613,8 @@ define <32 x i8> @test_bitreverse_v32i8(
 ; AVX1-LABEL: test_bitreverse_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
@@ -622,7 +622,7 @@ define <32 x i8> @test_bitreverse_v32i8(
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm5, %xmm1
 ; AVX1-NEXT:    vpor %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
@@ -1361,8 +1361,8 @@ define <64 x i8> @test_bitreverse_v64i8(
 ; AVX1-LABEL: test_bitreverse_v64i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm2
@@ -1370,7 +1370,7 @@ define <64 x i8> @test_bitreverse_v64i8(
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
 ; AVX1-NEXT:    vpshufb %xmm2, %xmm6, %xmm2
 ; AVX1-NEXT:    vpor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm4
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
@@ -1378,13 +1378,13 @@ define <64 x i8> @test_bitreverse_v64i8(
 ; AVX1-NEXT:    vpor %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpshufb %xmm2, %xmm6, %xmm2
 ; AVX1-NEXT:    vpor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm4
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Fri Feb 10 23:32:57 2017
@@ -10187,24 +10187,24 @@ define <32 x i1> @test_cmp_v32i64(<32 x
 ; AVX1-NEXT:    movq %rsp, %rbp
 ; AVX1-NEXT:    andq $-32, %rsp
 ; AVX1-NEXT:    subq $32, %rsp
-; AVX1-NEXT:    vmovaps 240(%rbp), %ymm8
+; AVX1-NEXT:    vmovdqa 240(%rbp), %ymm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm8, %xmm9
 ; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm10
 ; AVX1-NEXT:    vpcmpgtq %xmm9, %xmm10, %xmm9
-; AVX1-NEXT:    vmovaps 208(%rbp), %ymm10
+; AVX1-NEXT:    vmovdqa 208(%rbp), %ymm10
 ; AVX1-NEXT:    vpcmpgtq %xmm8, %xmm7, %xmm7
 ; AVX1-NEXT:    vpacksswb %xmm9, %xmm7, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm10, %xmm9
 ; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm9, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm10, %xmm6, %xmm6
-; AVX1-NEXT:    vmovaps 176(%rbp), %ymm9
+; AVX1-NEXT:    vmovdqa 176(%rbp), %ymm9
 ; AVX1-NEXT:    vpacksswb %xmm7, %xmm6, %xmm6
 ; AVX1-NEXT:    vpacksswb %xmm8, %xmm6, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm7
 ; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
-; AVX1-NEXT:    vmovaps 144(%rbp), %ymm10
+; AVX1-NEXT:    vmovdqa 144(%rbp), %ymm10
 ; AVX1-NEXT:    vpcmpgtq %xmm9, %xmm5, %xmm5
 ; AVX1-NEXT:    vpacksswb %xmm6, %xmm5, %xmm5
 ; AVX1-NEXT:    vextractf128 $1, %ymm10, %xmm6
@@ -10212,26 +10212,26 @@ define <32 x i1> @test_cmp_v32i64(<32 x
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm10, %xmm4, %xmm4
 ; AVX1-NEXT:    vpacksswb %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vmovaps 112(%rbp), %ymm6
+; AVX1-NEXT:    vmovdqa 112(%rbp), %ymm6
 ; AVX1-NEXT:    vpacksswb %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vpacksswb %xmm8, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm5
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm7, %xmm5
-; AVX1-NEXT:    vmovaps 80(%rbp), %ymm7
+; AVX1-NEXT:    vmovdqa 80(%rbp), %ymm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpacksswb %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm5
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vmovaps 48(%rbp), %ymm6
+; AVX1-NEXT:    vmovdqa 48(%rbp), %ymm6
 ; AVX1-NEXT:    vpacksswb %xmm5, %xmm2, %xmm2
 ; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm5, %xmm3
-; AVX1-NEXT:    vmovaps 16(%rbp), %ymm5
+; AVX1-NEXT:    vmovdqa 16(%rbp), %ymm5
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpacksswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm3

Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Fri Feb 10 23:32:57 2017
@@ -4315,7 +4315,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %r14d
 ; AVX1-NEXT:    orl %ebx, %r14d
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -4462,7 +4462,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %r14d
 ; AVX1-NEXT:    orl %ebx, %r14d
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -4616,7 +4616,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %r14d
 ; AVX1-NEXT:    orl %ebx, %r14d
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -4774,7 +4774,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %r15d
 ; AVX1-NEXT:    orl %ebx, %r15d
-; AVX1-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -4799,7 +4799,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %r15d
 ; AVX1-NEXT:    orl %ebx, %r15d
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -5077,7 +5077,7 @@ define void @store_cvt_4f64_to_4i16(<4 x
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movl %eax, %r14d
-; AVX1-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -5235,7 +5235,7 @@ define void @store_cvt_4f64_to_8i16_unde
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %ebx
 ; AVX1-NEXT:    orl %ebp, %ebx
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -5406,7 +5406,7 @@ define void @store_cvt_4f64_to_8i16_zero
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movzwl %ax, %ebx
 ; AVX1-NEXT:    orl %ebp, %ebx
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups (%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -5576,7 +5576,7 @@ define void @store_cvt_8f64_to_8i16(<8 x
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX1-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -5587,7 +5587,7 @@ define void @store_cvt_8f64_to_8i16(<8 x
 ; AVX1-NEXT:    # xmm0 = mem[1,0]
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movl %eax, %r12d
-; AVX1-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll Fri Feb 10 23:32:57 2017
@@ -87,7 +87,7 @@ define <4 x i64> @test_div7_4i64(<4 x i6
 define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: test_div7_8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
+; AVX1-NEXT:    vmovdqa {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
@@ -348,7 +348,7 @@ define <4 x i64> @test_rem7_4i64(<4 x i6
 define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: test_rem7_8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
+; AVX1-NEXT:    vmovdqa {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Fri Feb 10 23:32:57 2017
@@ -359,7 +359,7 @@ define <4 x i64> @test_rem7_4i64(<4 x i6
 define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: test_rem7_8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757]
+; AVX1-NEXT:    vmovdqa {{.*#+}} ymm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757]
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4

Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll Fri Feb 10 23:32:57 2017
@@ -11,8 +11,8 @@ define <4 x i64> @testv4i64(<4 x i64> %i
 ; AVX1-LABEL: testv4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm1
@@ -37,7 +37,7 @@ define <4 x i64> @testv4i64(<4 x i64> %i
 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
 ; AVX1-NEXT:    vpsrlq $32, %xmm5, %xmm5
 ; AVX1-NEXT:    vpaddq %xmm2, %xmm5, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm6
 ; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
@@ -143,8 +143,8 @@ define <4 x i64> @testv4i64u(<4 x i64> %
 ; AVX1-LABEL: testv4i64u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm1
@@ -169,7 +169,7 @@ define <4 x i64> @testv4i64u(<4 x i64> %
 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
 ; AVX1-NEXT:    vpsrlq $32, %xmm5, %xmm5
 ; AVX1-NEXT:    vpaddq %xmm2, %xmm5, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm6
 ; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
@@ -275,8 +275,8 @@ define <8 x i32> @testv8i32(<8 x i32> %i
 ; AVX1-LABEL: testv8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
@@ -296,7 +296,7 @@ define <8 x i32> @testv8i32(<8 x i32> %i
 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
@@ -387,8 +387,8 @@ define <8 x i32> @testv8i32u(<8 x i32> %
 ; AVX1-LABEL: testv8i32u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
@@ -408,7 +408,7 @@ define <8 x i32> @testv8i32u(<8 x i32> %
 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
@@ -499,8 +499,8 @@ define <16 x i16> @testv16i16(<16 x i16>
 ; AVX1-LABEL: testv16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
@@ -515,7 +515,7 @@ define <16 x i16> @testv16i16(<16 x i16>
 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
 ; AVX1-NEXT:    vpaddw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
@@ -586,8 +586,8 @@ define <16 x i16> @testv16i16u(<16 x i16
 ; AVX1-LABEL: testv16i16u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
@@ -602,7 +602,7 @@ define <16 x i16> @testv16i16u(<16 x i16
 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
 ; AVX1-NEXT:    vpaddw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
@@ -673,8 +673,8 @@ define <32 x i8> @testv32i8(<32 x i8> %i
 ; AVX1-LABEL: testv32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
@@ -684,7 +684,7 @@ define <32 x i8> @testv32i8(<32 x i8> %i
 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
@@ -747,8 +747,8 @@ define <32 x i8> @testv32i8u(<32 x i8> %
 ; AVX1-LABEL: testv32i8u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
@@ -758,7 +758,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %
 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll Fri Feb 10 23:32:57 2017
@@ -6,8 +6,8 @@ define <4 x i64> @testv4i64(<4 x i64> %i
 ; AVX1-LABEL: testv4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
@@ -16,7 +16,7 @@ define <4 x i64> @testv4i64(<4 x i64> %i
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsadbw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
@@ -47,8 +47,8 @@ define <8 x i32> @testv8i32(<8 x i32> %i
 ; AVX1-LABEL: testv8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
@@ -61,7 +61,7 @@ define <8 x i32> @testv8i32(<8 x i32> %i
 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
 ; AVX1-NEXT:    vpsadbw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
@@ -145,15 +145,15 @@ define <32 x i8> @testv32i8(<32 x i8> %i
 ; AVX1-LABEL: testv32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll Fri Feb 10 23:32:57 2017
@@ -621,22 +621,22 @@ define <16 x i8> @trunc_add_const_v16i64
 ; AVX1-LABEL: trunc_add_const_v16i64_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
@@ -717,13 +717,13 @@ define <16 x i8> @trunc_add_const_v16i32
 ; AVX1-LABEL: trunc_add_const_v16i32_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
@@ -3401,22 +3401,22 @@ define <16 x i8> @trunc_and_const_v16i64
 ; AVX1-LABEL: trunc_and_const_v16i64_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
@@ -3497,13 +3497,13 @@ define <16 x i8> @trunc_and_const_v16i32
 ; AVX1-LABEL: trunc_and_const_v16i32_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
@@ -4137,22 +4137,22 @@ define <16 x i8> @trunc_xor_const_v16i64
 ; AVX1-LABEL: trunc_xor_const_v16i64_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
@@ -4233,13 +4233,13 @@ define <16 x i8> @trunc_xor_const_v16i32
 ; AVX1-LABEL: trunc_xor_const_v16i32_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
@@ -4873,22 +4873,22 @@ define <16 x i8> @trunc_or_const_v16i64_
 ; AVX1-LABEL: trunc_or_const_v16i64_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandps %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandps %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
@@ -4969,13 +4969,13 @@ define <16 x i8> @trunc_or_const_v16i32_
 ; AVX1-LABEL: trunc_or_const_v16i32_v16i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc.ll Fri Feb 10 23:32:57 2017
@@ -144,13 +144,13 @@ define void @trunc8i64_8i8(<8 x i64> %a)
 ; AVX1-LABEL: trunc8i64_8i8:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
@@ -435,13 +435,13 @@ define void @trunc16i32_16i8(<16 x i32>
 ; AVX1-LABEL: trunc16i32_16i8:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovdqu %xmm0, (%rax)

Modified: llvm/trunk/test/CodeGen/X86/viabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/viabs.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/viabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/viabs.ll Fri Feb 10 23:32:57 2017
@@ -611,8 +611,8 @@ define <8 x i64> @test_abs_le_v8i64_fold
 ;
 ; AVX1-LABEL: test_abs_le_v8i64_fold:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovups (%rdi), %ymm0
-; AVX1-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
+; AVX1-NEXT:    vmovdqu 32(%rdi), %ymm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm3
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]

Modified: llvm/trunk/test/CodeGen/X86/vselect-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-minmax.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-minmax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-minmax.ll Fri Feb 10 23:32:57 2017
@@ -5418,22 +5418,22 @@ define <8 x i64> @test125(<8 x i64> %a,
 ; AVX1-LABEL: test125:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 ; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
@@ -5589,26 +5589,26 @@ define <8 x i64> @test126(<8 x i64> %a,
 ; AVX1-LABEL: test126:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
 ; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
@@ -5746,22 +5746,22 @@ define <8 x i64> @test127(<8 x i64> %a,
 ; AVX1-LABEL: test127:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 ; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
@@ -5918,26 +5918,26 @@ define <8 x i64> @test128(<8 x i64> %a,
 ; AVX1-LABEL: test128:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
 ; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
@@ -8134,22 +8134,22 @@ define <8 x i64> @test157(<8 x i64> %a,
 ; AVX1-LABEL: test157:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 ; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
@@ -8302,26 +8302,26 @@ define <8 x i64> @test158(<8 x i64> %a,
 ; AVX1-LABEL: test158:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
 ; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
@@ -8460,22 +8460,22 @@ define <8 x i64> @test159(<8 x i64> %a,
 ; AVX1-LABEL: test159:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
 ; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
@@ -8629,26 +8629,26 @@ define <8 x i64> @test160(<8 x i64> %a,
 ; AVX1-LABEL: test160:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
 ; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
-; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm6
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
-; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
-; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm5
 ; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
@@ -9066,13 +9066,13 @@ define <4 x i64> @test165(<4 x i64> %a,
 ; AVX1-LABEL: test165:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
@@ -9164,15 +9164,15 @@ define <4 x i64> @test166(<4 x i64> %a,
 ; AVX1-LABEL: test166:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm5
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
@@ -9258,13 +9258,13 @@ define <4 x i64> @test167(<4 x i64> %a,
 ; AVX1-LABEL: test167:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
@@ -9356,15 +9356,15 @@ define <4 x i64> @test168(<4 x i64> %a,
 ; AVX1-LABEL: test168:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm5
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
@@ -9777,13 +9777,13 @@ define <4 x i64> @test173(<4 x i64> %a,
 ; AVX1-LABEL: test173:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
@@ -9874,15 +9874,15 @@ define <4 x i64> @test174(<4 x i64> %a,
 ; AVX1-LABEL: test174:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm5
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
@@ -9969,13 +9969,13 @@ define <4 x i64> @test175(<4 x i64> %a,
 ; AVX1-LABEL: test175:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
 ; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
@@ -10066,15 +10066,15 @@ define <4 x i64> @test176(<4 x i64> %a,
 ; AVX1-LABEL: test176:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm5
-; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
 ; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2

Modified: llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll?rev=294824&r1=294823&r2=294824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll Fri Feb 10 23:32:57 2017
@@ -53,17 +53,29 @@ define <4 x double> @load_factorf64_2(<1
 }
 
 define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
-; AVX-LABEL: load_factorf64_1:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovupd (%rdi), %ymm0
-; AVX-NEXT:    vmovupd 32(%rdi), %ymm1
-; AVX-NEXT:    vmovupd 64(%rdi), %ymm2
-; AVX-NEXT:    vmovupd 96(%rdi), %ymm3
-; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; AVX-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
-; AVX-NEXT:    vmulpd %ymm0, %ymm0, %ymm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: load_factorf64_1:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovups 64(%rdi), %ymm2
+; AVX1-NEXT:    vmovups 96(%rdi), %ymm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT:    vmulpd %ymm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_factorf64_1:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovupd (%rdi), %ymm0
+; AVX2-NEXT:    vmovupd 32(%rdi), %ymm1
+; AVX2-NEXT:    vmovupd 64(%rdi), %ymm2
+; AVX2-NEXT:    vmovupd 96(%rdi), %ymm3
+; AVX2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX2-NEXT:    vmulpd %ymm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
   %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16
   %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
   %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>




More information about the llvm-commits mailing list