[llvm] [X86] Add baseline andnot tests for #172329 (PR #184991)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 6 03:32:31 PST 2026


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/184991

None

>From 2f4168fa9df1d4af506cbeaaec599f5559011a2a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 6 Mar 2026 11:07:36 +0000
Subject: [PATCH] [X86] Add baseline andnot tests for #172329

---
 llvm/test/CodeGen/X86/andnot-sink-not.ll | 3149 ++++++++++++++++++++++
 1 file changed, 3149 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/andnot-sink-not.ll

diff --git a/llvm/test/CodeGen/X86/andnot-sink-not.ll b/llvm/test/CodeGen/X86/andnot-sink-not.ll
new file mode 100644
index 0000000000000..4d6aa02c0fe72
--- /dev/null
+++ b/llvm/test/CodeGen/X86/andnot-sink-not.ll
@@ -0,0 +1,3149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi | FileCheck %s --check-prefixes=X86-NOBMI,X86
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse | FileCheck %s --check-prefixes=X86-NOBMI,X86-SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse2 | FileCheck %s --check-prefixes=X86-NOBMI,X86-SSE2
+; RUN: llc < %s -mtriple=i686-- -mattr=+bmi | FileCheck %s --check-prefixes=X86-BMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOAVX2,X64-NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI,X64-NOAVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi,+avx2 | FileCheck %s --check-prefixes=X64,X64-BMI,X64-AVX2
+
+define i8 @and_sink_not_i8(i8 %x, i8 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i8:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB0_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notb %cl
+; X86-NOBMI-NEXT:    andb %al, %cl
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB0_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i8:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB0_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notb %cl
+; X86-BMI-NEXT:    andb %al, %cl
+; X86-BMI-NEXT:    movl %ecx, %eax
+; X86-BMI-NEXT:  .LBB0_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB0_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notb %sil
+; X64-NEXT:    andb %dil, %sil
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB0_2: # %identity
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  %a = xor i8 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i8 %a, %x
+  ret i8 %masked
+
+identity:
+  ret i8 %x
+}
+
+define i8 @and_sink_not_i8_swapped(i8 %x, i8 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i8_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB1_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notb %cl
+; X86-NOBMI-NEXT:    andb %cl, %al
+; X86-NOBMI-NEXT:  .LBB1_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i8_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB1_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notb %cl
+; X86-BMI-NEXT:    andb %cl, %al
+; X86-BMI-NEXT:  .LBB1_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i8_swapped:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB1_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notb %sil
+; X64-NEXT:    andb %sil, %al
+; X64-NEXT:  .LBB1_2: # %identity
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = xor i8 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i8 %x, %a
+  ret i8 %masked
+
+identity:
+  ret i8 %x
+}
+
+define i16 @and_sink_not_i16(i16 %x, i16 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i16:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB2_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %eax, %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    retl
+; X86-NOBMI-NEXT:  .LBB2_2: # %identity
+; X86-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i16:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB2_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %eax, %ecx
+; X86-BMI-NEXT:    movl %ecx, %eax
+; X86-BMI-NEXT:    retl
+; X86-BMI-NEXT:  .LBB2_2: # %identity
+; X86-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB2_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notl %esi
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB2_2: # %identity
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  %a = xor i16 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i16 %a, %x
+  ret i16 %masked
+
+identity:
+  ret i16 %x
+}
+
+define i16 @and_sink_not_i16_swapped(i16 %x, i16 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i16_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB3_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB3_2: # %identity
+; X86-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i16_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB3_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %ecx, %eax
+; X86-BMI-NEXT:  .LBB3_2: # %identity
+; X86-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i16_swapped:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB3_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notl %esi
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:  .LBB3_2: # %identity
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = xor i16 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i16 %x, %a
+  ret i16 %masked
+
+identity:
+  ret i16 %x
+}
+
+define i32 @and_sink_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB4_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %eax, %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB4_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB4_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %eax, %ecx
+; X86-BMI-NEXT:    movl %ecx, %eax
+; X86-BMI-NEXT:  .LBB4_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB4_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notl %esi
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB4_2: # %identity
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  %a = xor i32 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i32 %a, %x
+  ret i32 %masked
+
+identity:
+  ret i32 %x
+}
+
+define i32 @and_sink_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; X86-NOBMI-LABEL: and_sink_not_i32_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB5_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:  .LBB5_2: # %identity
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i32_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB5_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %ecx, %eax
+; X86-BMI-NEXT:  .LBB5_2: # %identity
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i32_swapped:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB5_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notl %esi
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:  .LBB5_2: # %identity
+; X64-NEXT:    retq
+  %a = xor i32 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i32 %x, %a
+  ret i32 %masked
+
+identity:
+  ret i32 %x
+}
+
+define i64 @and_sink_not_i64(i64 %x, i64 %m, i1 zeroext %cond) nounwind {
+; X86-NOBMI-LABEL: and_sink_not_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB6_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    notl %esi
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %eax, %ecx
+; X86-NOBMI-NEXT:    andl %edx, %esi
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    movl %esi, %edx
+; X86-NOBMI-NEXT:  .LBB6_2: # %identity
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB6_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %eax, %ecx
+; X86-BMI-NEXT:    andl %edx, %esi
+; X86-BMI-NEXT:    movl %ecx, %eax
+; X86-BMI-NEXT:    movl %esi, %edx
+; X86-BMI-NEXT:  .LBB6_2: # %identity
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB6_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notq %rsi
+; X64-NEXT:    andq %rdi, %rsi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB6_2: # %identity
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    retq
+  %a = xor i64 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i64 %a, %x
+  ret i64 %masked
+
+identity:
+  ret i64 %x
+}
+
+define i64 @and_sink_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) nounwind {
+; X86-NOBMI-LABEL: and_sink_not_i64_swapped:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    je .LBB7_2
+; X86-NOBMI-NEXT:  # %bb.1: # %mask
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    notl %esi
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:    andl %esi, %edx
+; X86-NOBMI-NEXT:  .LBB7_2: # %identity
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_i64_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB7_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %ecx, %eax
+; X86-BMI-NEXT:    andl %esi, %edx
+; X86-BMI-NEXT:  .LBB7_2: # %identity
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-LABEL: and_sink_not_i64_swapped:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    je .LBB7_2
+; X64-NEXT:  # %bb.1: # %mask
+; X64-NEXT:    notq %rsi
+; X64-NEXT:    andq %rsi, %rax
+; X64-NEXT:  .LBB7_2: # %identity
+; X64-NEXT:    retq
+  %a = xor i64 %m, -1
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and i64 %x, %a
+  ret i64 %masked
+
+identity:
+  ret i64 %x
+}
+
+define <8 x i8> @and_sink_not_v8i8(<8 x i8> %x, <8 x i8> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v8i8:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB8_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    notb %dh
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %dl, %ch
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    notb %dl
+; X86-NEXT:    andb %cl, %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    notb %cl
+; X86-NEXT:    andb %bh, %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    notb %bh
+; X86-NEXT:    andb %bl, %bh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    notb %bl
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %bl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    notb %al
+; X86-NEXT:    andb %ah, %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    notb %ah
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movb %ah, (%esi)
+; X86-NEXT:    movb %al, 1(%esi)
+; X86-NEXT:    movb %bl, 2(%esi)
+; X86-NEXT:    movb %bh, 3(%esi)
+; X86-NEXT:    movb %cl, 4(%esi)
+; X86-NEXT:    movb %dl, 5(%esi)
+; X86-NEXT:    movb %ch, 6(%esi)
+; X86-NEXT:    movb %dh, 7(%esi)
+; X86-NEXT:    jmp .LBB8_3
+; X86-NEXT:  .LBB8_2: # %identity
+; X86-NEXT:    movb %al, (%esi)
+; X86-NEXT:    movb %ah, 1(%esi)
+; X86-NEXT:    movb %dh, 2(%esi)
+; X86-NEXT:    movb %bl, 3(%esi)
+; X86-NEXT:    movb %bh, 4(%esi)
+; X86-NEXT:    movb %cl, 5(%esi)
+; X86-NEXT:    movb %dl, 6(%esi)
+; X86-NEXT:    movb %ch, 7(%esi)
+; X86-NEXT:  .LBB8_3: # %identity
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB8_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    notb %dh
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %dl, %ch
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-SSE-NEXT:    notb %dl
+; X86-SSE-NEXT:    andb %cl, %dl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE-NEXT:    notb %cl
+; X86-SSE-NEXT:    andb %bh, %cl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-SSE-NEXT:    notb %bh
+; X86-SSE-NEXT:    andb %bl, %bh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-SSE-NEXT:    notb %bl
+; X86-SSE-NEXT:    andb {{[0-9]+}}(%esp), %bl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-SSE-NEXT:    notb %al
+; X86-SSE-NEXT:    andb %ah, %al
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-SSE-NEXT:    notb %ah
+; X86-SSE-NEXT:    andb {{[0-9]+}}(%esp), %ah
+; X86-SSE-NEXT:    movb %ah, (%esi)
+; X86-SSE-NEXT:    movb %al, 1(%esi)
+; X86-SSE-NEXT:    movb %bl, 2(%esi)
+; X86-SSE-NEXT:    movb %bh, 3(%esi)
+; X86-SSE-NEXT:    movb %cl, 4(%esi)
+; X86-SSE-NEXT:    movb %dl, 5(%esi)
+; X86-SSE-NEXT:    movb %ch, 6(%esi)
+; X86-SSE-NEXT:    movb %dh, 7(%esi)
+; X86-SSE-NEXT:    jmp .LBB8_3
+; X86-SSE-NEXT:  .LBB8_2: # %identity
+; X86-SSE-NEXT:    movb %al, (%esi)
+; X86-SSE-NEXT:    movb %ah, 1(%esi)
+; X86-SSE-NEXT:    movb %dh, 2(%esi)
+; X86-SSE-NEXT:    movb %bl, 3(%esi)
+; X86-SSE-NEXT:    movb %bh, 4(%esi)
+; X86-SSE-NEXT:    movb %cl, 5(%esi)
+; X86-SSE-NEXT:    movb %dl, 6(%esi)
+; X86-SSE-NEXT:    movb %ch, 7(%esi)
+; X86-SSE-NEXT:  .LBB8_3: # %identity
+; X86-SSE-NEXT:    movl %esi, %eax
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_v8i8:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB8_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
+; X86-SSE2-NEXT:    pand %xmm0, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB8_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_v8i8:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB8_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    notb %dh
+; X86-BMI-NEXT:    andb %ch, %dh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %dl, %ch
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI-NEXT:    notb %dl
+; X86-BMI-NEXT:    andb %cl, %dl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI-NEXT:    notb %cl
+; X86-BMI-NEXT:    andb %bh, %cl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-BMI-NEXT:    notb %bh
+; X86-BMI-NEXT:    andb %bl, %bh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI-NEXT:    notb %bl
+; X86-BMI-NEXT:    andb {{[0-9]+}}(%esp), %bl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI-NEXT:    notb %al
+; X86-BMI-NEXT:    andb %ah, %al
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-BMI-NEXT:    notb %ah
+; X86-BMI-NEXT:    andb {{[0-9]+}}(%esp), %ah
+; X86-BMI-NEXT:    movb %ah, (%esi)
+; X86-BMI-NEXT:    movb %al, 1(%esi)
+; X86-BMI-NEXT:    movb %bl, 2(%esi)
+; X86-BMI-NEXT:    movb %bh, 3(%esi)
+; X86-BMI-NEXT:    movb %cl, 4(%esi)
+; X86-BMI-NEXT:    movb %dl, 5(%esi)
+; X86-BMI-NEXT:    movb %ch, 6(%esi)
+; X86-BMI-NEXT:    movb %dh, 7(%esi)
+; X86-BMI-NEXT:    jmp .LBB8_3
+; X86-BMI-NEXT:  .LBB8_2: # %identity
+; X86-BMI-NEXT:    movb %al, (%esi)
+; X86-BMI-NEXT:    movb %ah, 1(%esi)
+; X86-BMI-NEXT:    movb %dh, 2(%esi)
+; X86-BMI-NEXT:    movb %bl, 3(%esi)
+; X86-BMI-NEXT:    movb %bh, 4(%esi)
+; X86-BMI-NEXT:    movb %cl, 5(%esi)
+; X86-BMI-NEXT:    movb %dl, 6(%esi)
+; X86-BMI-NEXT:    movb %ch, 7(%esi)
+; X86-BMI-NEXT:  .LBB8_3: # %identity
+; X86-BMI-NEXT:    movl %esi, %eax
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_v8i8:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %edi, %edi
+; X64-NOAVX2-NEXT:    je .LBB8_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-NOAVX2-NEXT:    pxor %xmm2, %xmm1
+; X64-NOAVX2-NEXT:    pand %xmm0, %xmm1
+; X64-NOAVX2-NEXT:    movdqa %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB8_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_v8i8:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %edi, %edi
+; X64-AVX2-NEXT:    je .LBB8_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT:  .LBB8_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor <8 x i8> %m, splat (i8 -1)
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <8 x i8> %a, %x
+  ret <8 x i8> %masked
+
+identity:
+  ret <8 x i8> %x
+}
+
+define <8 x i8> @and_sink_not_v8i8_swapped(<8 x i8> %x, <8 x i8> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v8i8_swapped:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB9_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movb %ch, %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    andb %ch, %ah
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %bl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %bh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    movb %dh, (%esi)
+; X86-NEXT:    movb %dl, 1(%esi)
+; X86-NEXT:    movb %cl, 2(%esi)
+; X86-NEXT:    movb %bh, 3(%esi)
+; X86-NEXT:    movb %bl, 4(%esi)
+; X86-NEXT:    movb %al, 5(%esi)
+; X86-NEXT:    movb %ah, 6(%esi)
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, 7(%esi)
+; X86-NEXT:    jmp .LBB9_3
+; X86-NEXT:  .LBB9_2: # %identity
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movb %dh, (%esi)
+; X86-NEXT:    movb %dl, 1(%esi)
+; X86-NEXT:    movb %cl, 2(%esi)
+; X86-NEXT:    movb %bh, 3(%esi)
+; X86-NEXT:    movb %bl, 4(%esi)
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb %cl, 5(%esi)
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb %cl, 6(%esi)
+; X86-NEXT:    movb %ch, 7(%esi)
+; X86-NEXT:  .LBB9_3: # %identity
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v8i8_swapped:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB9_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movb %ch, %dh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-SSE-NEXT:    andb %ch, %ah
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %al
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %bl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %bh
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %cl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %dl
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    movb %dh, (%esi)
+; X86-SSE-NEXT:    movb %dl, 1(%esi)
+; X86-SSE-NEXT:    movb %cl, 2(%esi)
+; X86-SSE-NEXT:    movb %bh, 3(%esi)
+; X86-SSE-NEXT:    movb %bl, 4(%esi)
+; X86-SSE-NEXT:    movb %al, 5(%esi)
+; X86-SSE-NEXT:    movb %ah, 6(%esi)
+; X86-SSE-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SSE-NEXT:    movb %cl, 7(%esi)
+; X86-SSE-NEXT:    jmp .LBB9_3
+; X86-SSE-NEXT:  .LBB9_2: # %identity
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    movb %dh, (%esi)
+; X86-SSE-NEXT:    movb %dl, 1(%esi)
+; X86-SSE-NEXT:    movb %cl, 2(%esi)
+; X86-SSE-NEXT:    movb %bh, 3(%esi)
+; X86-SSE-NEXT:    movb %bl, 4(%esi)
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE-NEXT:    movb %cl, 5(%esi)
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE-NEXT:    movb %cl, 6(%esi)
+; X86-SSE-NEXT:    movb %ch, 7(%esi)
+; X86-SSE-NEXT:  .LBB9_3: # %identity
+; X86-SSE-NEXT:    movl %esi, %eax
+; X86-SSE-NEXT:    addl $4, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_v8i8_swapped:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB9_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB9_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_v8i8_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    pushl %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB9_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movb %ch, %dh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %dh
+; X86-BMI-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-BMI-NEXT:    andb %ch, %ah
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %al
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %bl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %bh
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %cl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %dl
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    andb %ch, %dh
+; X86-BMI-NEXT:    movb %dh, (%esi)
+; X86-BMI-NEXT:    movb %dl, 1(%esi)
+; X86-BMI-NEXT:    movb %cl, 2(%esi)
+; X86-BMI-NEXT:    movb %bh, 3(%esi)
+; X86-BMI-NEXT:    movb %bl, 4(%esi)
+; X86-BMI-NEXT:    movb %al, 5(%esi)
+; X86-BMI-NEXT:    movb %ah, 6(%esi)
+; X86-BMI-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-BMI-NEXT:    movb %cl, 7(%esi)
+; X86-BMI-NEXT:    jmp .LBB9_3
+; X86-BMI-NEXT:  .LBB9_2: # %identity
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    movb %dh, (%esi)
+; X86-BMI-NEXT:    movb %dl, 1(%esi)
+; X86-BMI-NEXT:    movb %cl, 2(%esi)
+; X86-BMI-NEXT:    movb %bh, 3(%esi)
+; X86-BMI-NEXT:    movb %bl, 4(%esi)
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI-NEXT:    movb %cl, 5(%esi)
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI-NEXT:    movb %cl, 6(%esi)
+; X86-BMI-NEXT:    movb %ch, 7(%esi)
+; X86-BMI-NEXT:  .LBB9_3: # %identity
+; X86-BMI-NEXT:    movl %esi, %eax
+; X86-BMI-NEXT:    addl $4, %esp
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_v8i8_swapped:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %edi, %edi
+; X64-NOAVX2-NEXT:    je .LBB9_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-NOAVX2-NEXT:    pxor %xmm2, %xmm1
+; X64-NOAVX2-NEXT:    pand %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB9_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_v8i8_swapped:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %edi, %edi
+; X64-AVX2-NEXT:    je .LBB9_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:  .LBB9_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor <8 x i8> %m, splat (i8 -1)
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <8 x i8> %x, %a
+  ret <8 x i8> %masked
+
+identity:
+  ret <8 x i8> %x
+}
+
+define <4 x i32> @and_sink_not_v4i32(<4 x i32> %x, <4 x i32> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v4i32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB10_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    andl %ebx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %edx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    notl %edx
+; X86-NEXT:    andl %ecx, %edx
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    jmp .LBB10_3
+; X86-NEXT:  .LBB10_2: # %identity
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:  .LBB10_3: # %identity
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    movl %esp, %ebp
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    andl $-16, %esp
+; X86-SSE-NEXT:    subl $64, %esp
+; X86-SSE-NEXT:    movl 8(%ebp), %eax
+; X86-SSE-NEXT:    movl 24(%ebp), %ecx
+; X86-SSE-NEXT:    movl 20(%ebp), %edx
+; X86-SSE-NEXT:    movl 16(%ebp), %esi
+; X86-SSE-NEXT:    movzbl 44(%ebp), %ebx
+; X86-SSE-NEXT:    testb %bl, %bl
+; X86-SSE-NEXT:    movl 12(%ebp), %edi
+; X86-SSE-NEXT:    movups 28(%ebp), %xmm0
+; X86-SSE-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB10_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl 16(%ebp), %edi
+; X86-SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl 20(%ebp), %edi
+; X86-SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl 24(%ebp), %edi
+; X86-SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %edx, (%esp)
+; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    andps %xmm0, %xmm1
+; X86-SSE-NEXT:    movaps %xmm1, (%eax)
+; X86-SSE-NEXT:    jmp .LBB10_3
+; X86-SSE-NEXT:  .LBB10_2: # %identity
+; X86-SSE-NEXT:    movl %edi, (%eax)
+; X86-SSE-NEXT:    movl %esi, 4(%eax)
+; X86-SSE-NEXT:    movl %edx, 8(%eax)
+; X86-SSE-NEXT:    movl %ecx, 12(%eax)
+; X86-SSE-NEXT:  .LBB10_3: # %identity
+; X86-SSE-NEXT:    leal -12(%ebp), %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_v4i32:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB10_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
+; X86-SSE2-NEXT:    pand %xmm0, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB10_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_v4i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB10_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    notl %edi
+; X86-BMI-NEXT:    andl %ebx, %edi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %esi, %ebx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %edx, %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    notl %edx
+; X86-BMI-NEXT:    andl %ecx, %edx
+; X86-BMI-NEXT:    movl %edx, (%eax)
+; X86-BMI-NEXT:    movl %esi, 4(%eax)
+; X86-BMI-NEXT:    movl %ebx, 8(%eax)
+; X86-BMI-NEXT:    movl %edi, 12(%eax)
+; X86-BMI-NEXT:    jmp .LBB10_3
+; X86-BMI-NEXT:  .LBB10_2: # %identity
+; X86-BMI-NEXT:    movl %ecx, (%eax)
+; X86-BMI-NEXT:    movl %edx, 4(%eax)
+; X86-BMI-NEXT:    movl %esi, 8(%eax)
+; X86-BMI-NEXT:    movl %ebx, 12(%eax)
+; X86-BMI-NEXT:  .LBB10_3: # %identity
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_v4i32:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %edi, %edi
+; X64-NOAVX2-NEXT:    je .LBB10_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-NOAVX2-NEXT:    pxor %xmm2, %xmm1
+; X64-NOAVX2-NEXT:    pand %xmm0, %xmm1
+; X64-NOAVX2-NEXT:    movdqa %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB10_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_v4i32:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %edi, %edi
+; X64-AVX2-NEXT:    je .LBB10_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT:  .LBB10_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor <4 x i32> %m, splat (i32 -1)
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i32> %a, %x
+  ret <4 x i32> %masked
+
+identity:
+  ret <4 x i32> %x
+}
+
+define <4 x i32> @and_sink_not_v4i32_swapped(<4 x i32> %x, <4 x i32> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v4i32_swapped:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB11_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %ebx, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %ebx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %ebx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %ebx, %edi
+; X86-NEXT:  .LBB11_2: # %identity
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v4i32_swapped:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    movl %esp, %ebp
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    andl $-16, %esp
+; X86-SSE-NEXT:    subl $64, %esp
+; X86-SSE-NEXT:    movl 8(%ebp), %eax
+; X86-SSE-NEXT:    movl 24(%ebp), %ecx
+; X86-SSE-NEXT:    movl 20(%ebp), %edx
+; X86-SSE-NEXT:    movl 16(%ebp), %esi
+; X86-SSE-NEXT:    movzbl 44(%ebp), %ebx
+; X86-SSE-NEXT:    testb %bl, %bl
+; X86-SSE-NEXT:    movl 12(%ebp), %edi
+; X86-SSE-NEXT:    movups 28(%ebp), %xmm0
+; X86-SSE-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB11_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %edi, (%esp)
+; X86-SSE-NEXT:    movl 16(%ebp), %ecx
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl 20(%ebp), %ecx
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl 24(%ebp), %ecx
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    andps %xmm0, %xmm1
+; X86-SSE-NEXT:    movaps %xmm1, (%eax)
+; X86-SSE-NEXT:    jmp .LBB11_3
+; X86-SSE-NEXT:  .LBB11_2: # %identity
+; X86-SSE-NEXT:    movl %edi, (%eax)
+; X86-SSE-NEXT:    movl %esi, 4(%eax)
+; X86-SSE-NEXT:    movl %edx, 8(%eax)
+; X86-SSE-NEXT:    movl %ecx, 12(%eax)
+; X86-SSE-NEXT:  .LBB11_3: # %identity
+; X86-SSE-NEXT:    leal -12(%ebp), %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_v4i32_swapped:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB11_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB11_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_v4i32_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB11_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %ebx, %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %ebx, %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %ebx, %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %ebx, %edi
+; X86-BMI-NEXT:  .LBB11_2: # %identity
+; X86-BMI-NEXT:    movl %edi, (%eax)
+; X86-BMI-NEXT:    movl %esi, 4(%eax)
+; X86-BMI-NEXT:    movl %edx, 8(%eax)
+; X86-BMI-NEXT:    movl %ecx, 12(%eax)
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_v4i32_swapped:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %edi, %edi
+; X64-NOAVX2-NEXT:    je .LBB11_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-NOAVX2-NEXT:    pxor %xmm2, %xmm1
+; X64-NOAVX2-NEXT:    pand %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB11_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_v4i32_swapped:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %edi, %edi
+; X64-AVX2-NEXT:    je .LBB11_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:  .LBB11_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor <4 x i32> %m, splat (i32 -1)
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i32> %x, %a
+  ret <4 x i32> %masked
+
+identity:
+  ret <4 x i32> %x
+}
+
+define <4 x i64> @and_sink_not_v4i64(<4 x i64> %x, <4 x i64> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v4i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    je .LBB12_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %edx, %esi
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    notl %ecx
+; X86-NEXT:    andl %ebp, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    notl %ebp
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, (%edx)
+; X86-NEXT:    movl %eax, 4(%edx)
+; X86-NEXT:    movl %edi, 8(%edx)
+; X86-NEXT:    movl %ebp, 12(%edx)
+; X86-NEXT:    movl %ecx, 16(%edx)
+; X86-NEXT:    movl %esi, 20(%edx)
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 24(%edx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 28(%edx)
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    jmp .LBB12_3
+; X86-NEXT:  .LBB12_2: # %identity
+; X86-NEXT:    movl %ebx, (%eax)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 4(%eax)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %ebp, 16(%eax)
+; X86-NEXT:    movl %ecx, 20(%eax)
+; X86-NEXT:    movl %edx, 24(%eax)
+; X86-NEXT:    movl %esi, 28(%eax)
+; X86-NEXT:  .LBB12_3: # %identity
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v4i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    subl $8, %esp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    je .LBB12_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    notl %edi
+; X86-SSE-NEXT:    andl %esi, %edi
+; X86-SSE-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %edx, %esi
+; X86-SSE-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %ecx, %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    notl %ecx
+; X86-SSE-NEXT:    andl %ebp, %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SSE-NEXT:    notl %ebp
+; X86-SSE-NEXT:    andl {{[0-9]+}}(%esp), %ebp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    notl %edi
+; X86-SSE-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    movl %eax, %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    notl %eax
+; X86-SSE-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    notl %ebx
+; X86-SSE-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl %ebx, (%edx)
+; X86-SSE-NEXT:    movl %eax, 4(%edx)
+; X86-SSE-NEXT:    movl %edi, 8(%edx)
+; X86-SSE-NEXT:    movl %ebp, 12(%edx)
+; X86-SSE-NEXT:    movl %ecx, 16(%edx)
+; X86-SSE-NEXT:    movl %esi, 20(%edx)
+; X86-SSE-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-SSE-NEXT:    movl %eax, 24(%edx)
+; X86-SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SSE-NEXT:    movl %eax, 28(%edx)
+; X86-SSE-NEXT:    movl %edx, %eax
+; X86-SSE-NEXT:    jmp .LBB12_3
+; X86-SSE-NEXT:  .LBB12_2: # %identity
+; X86-SSE-NEXT:    movl %ebx, (%eax)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl %ebx, 4(%eax)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl %ebx, 8(%eax)
+; X86-SSE-NEXT:    movl %edi, 12(%eax)
+; X86-SSE-NEXT:    movl %ebp, 16(%eax)
+; X86-SSE-NEXT:    movl %ecx, 20(%eax)
+; X86-SSE-NEXT:    movl %edx, 24(%eax)
+; X86-SSE-NEXT:    movl %esi, 28(%eax)
+; X86-SSE-NEXT:  .LBB12_3: # %identity
+; X86-SSE-NEXT:    addl $8, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_v4i64:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $16, %esp
+; X86-SSE2-NEXT:    cmpb $0, 24(%ebp)
+; X86-SSE2-NEXT:    je .LBB12_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; X86-SSE2-NEXT:    pxor %xmm3, %xmm2
+; X86-SSE2-NEXT:    pxor 8(%ebp), %xmm3
+; X86-SSE2-NEXT:    pand %xmm0, %xmm2
+; X86-SSE2-NEXT:    pand %xmm1, %xmm3
+; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
+; X86-SSE2-NEXT:  .LBB12_2: # %identity
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_v4i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebp
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    subl $8, %esp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    je .LBB12_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    notl %edi
+; X86-BMI-NEXT:    andl %esi, %edi
+; X86-BMI-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %edx, %esi
+; X86-BMI-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %ecx, %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %ebp, %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI-NEXT:    notl %ebp
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %ebp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    notl %edi
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    notl %eax
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl %ebx, (%edx)
+; X86-BMI-NEXT:    movl %eax, 4(%edx)
+; X86-BMI-NEXT:    movl %edi, 8(%edx)
+; X86-BMI-NEXT:    movl %ebp, 12(%edx)
+; X86-BMI-NEXT:    movl %ecx, 16(%edx)
+; X86-BMI-NEXT:    movl %esi, 20(%edx)
+; X86-BMI-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-BMI-NEXT:    movl %eax, 24(%edx)
+; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-BMI-NEXT:    movl %eax, 28(%edx)
+; X86-BMI-NEXT:    movl %edx, %eax
+; X86-BMI-NEXT:    jmp .LBB12_3
+; X86-BMI-NEXT:  .LBB12_2: # %identity
+; X86-BMI-NEXT:    movl %ebx, (%eax)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl %ebx, 4(%eax)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl %ebx, 8(%eax)
+; X86-BMI-NEXT:    movl %edi, 12(%eax)
+; X86-BMI-NEXT:    movl %ebp, 16(%eax)
+; X86-BMI-NEXT:    movl %ecx, 20(%eax)
+; X86-BMI-NEXT:    movl %edx, 24(%eax)
+; X86-BMI-NEXT:    movl %esi, 28(%eax)
+; X86-BMI-NEXT:  .LBB12_3: # %identity
+; X86-BMI-NEXT:    addl $8, %esp
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    popl %ebp
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_v4i64:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %edi, %edi
+; X64-NOAVX2-NEXT:    je .LBB12_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    pcmpeqd %xmm4, %xmm4
+; X64-NOAVX2-NEXT:    pxor %xmm4, %xmm3
+; X64-NOAVX2-NEXT:    pxor %xmm4, %xmm2
+; X64-NOAVX2-NEXT:    pand %xmm0, %xmm2
+; X64-NOAVX2-NEXT:    pand %xmm1, %xmm3
+; X64-NOAVX2-NEXT:    movdqa %xmm2, %xmm0
+; X64-NOAVX2-NEXT:    movdqa %xmm3, %xmm1
+; X64-NOAVX2-NEXT:  .LBB12_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_v4i64:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %edi, %edi
+; X64-AVX2-NEXT:    je .LBB12_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
+; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT:  .LBB12_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor <4 x i64> %m, splat (i64 -1)
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i64> %a, %x
+  ret <4 x i64> %masked
+
+identity:
+  ret <4 x i64> %x
+}
+
+define <4 x i64> @and_sink_not_v4i64_swapped(<4 x i64> %x, <4 x i64> %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_v4i64_swapped:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB13_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, %eax
+; X86-NEXT:    movl %eax, (%ebx)
+; X86-NEXT:    movl %edx, 4(%ebx)
+; X86-NEXT:    movl %ecx, 8(%ebx)
+; X86-NEXT:    movl %ebp, 12(%ebx)
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 16(%ebx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 20(%ebx)
+; X86-NEXT:    movl %edi, 24(%ebx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 28(%ebx)
+; X86-NEXT:    jmp .LBB13_3
+; X86-NEXT:  .LBB13_2: # %identity
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, (%ebx)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, 4(%ebx)
+; X86-NEXT:    movl %ecx, 8(%ebx)
+; X86-NEXT:    movl %ebp, 12(%ebx)
+; X86-NEXT:    movl %eax, 16(%ebx)
+; X86-NEXT:    movl %edi, 20(%ebx)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, 24(%ebx)
+; X86-NEXT:    movl %esi, 28(%ebx)
+; X86-NEXT:  .LBB13_3: # %identity
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_v4i64_swapped:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB13_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl %esi, %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, %edx
+; X86-SSE-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    movl %edi, %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    andl %esi, %edi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, %edx
+; X86-SSE-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, %eax
+; X86-SSE-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, %ebp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    andl %esi, %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, %eax
+; X86-SSE-NEXT:    movl %eax, (%ebx)
+; X86-SSE-NEXT:    movl %edx, 4(%ebx)
+; X86-SSE-NEXT:    movl %ecx, 8(%ebx)
+; X86-SSE-NEXT:    movl %ebp, 12(%ebx)
+; X86-SSE-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-SSE-NEXT:    movl %eax, 16(%ebx)
+; X86-SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SSE-NEXT:    movl %eax, 20(%ebx)
+; X86-SSE-NEXT:    movl %edi, 24(%ebx)
+; X86-SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SSE-NEXT:    movl %eax, 28(%ebx)
+; X86-SSE-NEXT:    jmp .LBB13_3
+; X86-SSE-NEXT:  .LBB13_2: # %identity
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl %edx, (%ebx)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl %edx, 4(%ebx)
+; X86-SSE-NEXT:    movl %ecx, 8(%ebx)
+; X86-SSE-NEXT:    movl %ebp, 12(%ebx)
+; X86-SSE-NEXT:    movl %eax, 16(%ebx)
+; X86-SSE-NEXT:    movl %edi, 20(%ebx)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, 24(%ebx)
+; X86-SSE-NEXT:    movl %esi, 28(%ebx)
+; X86-SSE-NEXT:  .LBB13_3: # %identity
+; X86-SSE-NEXT:    movl %ebx, %eax
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_v4i64_swapped:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $16, %esp
+; X86-SSE2-NEXT:    cmpb $0, 24(%ebp)
+; X86-SSE2-NEXT:    je .LBB13_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; X86-SSE2-NEXT:    pxor %xmm3, %xmm2
+; X86-SSE2-NEXT:    pxor 8(%ebp), %xmm3
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pand %xmm3, %xmm1
+; X86-SSE2-NEXT:  .LBB13_2: # %identity
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_v4i64_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebp
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    subl $12, %esp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB13_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl %esi, %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, %edx
+; X86-BMI-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    movl %edi, %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    andl %esi, %edi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, %edx
+; X86-BMI-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, %eax
+; X86-BMI-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, %ebp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    andl %esi, %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, %eax
+; X86-BMI-NEXT:    movl %eax, (%ebx)
+; X86-BMI-NEXT:    movl %edx, 4(%ebx)
+; X86-BMI-NEXT:    movl %ecx, 8(%ebx)
+; X86-BMI-NEXT:    movl %ebp, 12(%ebx)
+; X86-BMI-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-BMI-NEXT:    movl %eax, 16(%ebx)
+; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-BMI-NEXT:    movl %eax, 20(%ebx)
+; X86-BMI-NEXT:    movl %edi, 24(%ebx)
+; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-BMI-NEXT:    movl %eax, 28(%ebx)
+; X86-BMI-NEXT:    jmp .LBB13_3
+; X86-BMI-NEXT:  .LBB13_2: # %identity
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl %edx, (%ebx)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl %edx, 4(%ebx)
+; X86-BMI-NEXT:    movl %ecx, 8(%ebx)
+; X86-BMI-NEXT:    movl %ebp, 12(%ebx)
+; X86-BMI-NEXT:    movl %eax, 16(%ebx)
+; X86-BMI-NEXT:    movl %edi, 20(%ebx)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, 24(%ebx)
+; X86-BMI-NEXT:    movl %esi, 28(%ebx)
+; X86-BMI-NEXT:  .LBB13_3: # %identity
+; X86-BMI-NEXT:    movl %ebx, %eax
+; X86-BMI-NEXT:    addl $12, %esp
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    popl %ebp
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_v4i64_swapped:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %edi, %edi
+; X64-NOAVX2-NEXT:    je .LBB13_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    pcmpeqd %xmm4, %xmm4
+; X64-NOAVX2-NEXT:    pxor %xmm4, %xmm3
+; X64-NOAVX2-NEXT:    pxor %xmm4, %xmm2
+; X64-NOAVX2-NEXT:    pand %xmm2, %xmm0
+; X64-NOAVX2-NEXT:    pand %xmm3, %xmm1
+; X64-NOAVX2-NEXT:  .LBB13_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_v4i64_swapped:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %edi, %edi
+; X64-AVX2-NEXT:    je .LBB13_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
+; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:  .LBB13_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor <4 x i64> %m, splat (i64 -1)
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i64> %x, %a
+  ret <4 x i64> %masked
+
+identity:
+  ret <4 x i64> %x
+}
+
+define <8 x i8> @and_sink_not_splat_v8i8(<8 x i8> %x, i8 %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_splat_v8i8:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB14_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movb %dl, %ch
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    notb %dl
+; X86-NEXT:    andb %dl, %ch
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    andb %dl, %ch
+; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    andb %dl, %ch
+; X86-NEXT:    andb %dl, %dh
+; X86-NEXT:    andb %dl, %bl
+; X86-NEXT:    andb %dl, %bh
+; X86-NEXT:    andb %dl, %cl
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    movb %dl, (%eax)
+; X86-NEXT:    movb %cl, 1(%eax)
+; X86-NEXT:    movb %bh, 2(%eax)
+; X86-NEXT:    movb %bl, 3(%eax)
+; X86-NEXT:    movb %dh, 4(%eax)
+; X86-NEXT:    movb %ch, 5(%eax)
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, 6(%eax)
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, 7(%eax)
+; X86-NEXT:    jmp .LBB14_3
+; X86-NEXT:  .LBB14_2: # %identity
+; X86-NEXT:    movb %ch, (%eax)
+; X86-NEXT:    movb %cl, 1(%eax)
+; X86-NEXT:    movb %bh, 2(%eax)
+; X86-NEXT:    movb %bl, 3(%eax)
+; X86-NEXT:    movb %dh, 4(%eax)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %cl, 5(%eax)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %cl, 6(%eax)
+; X86-NEXT:    movb %dl, 7(%eax)
+; X86-NEXT:  .LBB14_3: # %identity
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_splat_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB14_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movb %dl, %ch
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-SSE-NEXT:    notb %dl
+; X86-SSE-NEXT:    andb %dl, %ch
+; X86-SSE-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    andb %dl, %ch
+; X86-SSE-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    andb %dl, %ch
+; X86-SSE-NEXT:    andb %dl, %dh
+; X86-SSE-NEXT:    andb %dl, %bl
+; X86-SSE-NEXT:    andb %dl, %bh
+; X86-SSE-NEXT:    andb %dl, %cl
+; X86-SSE-NEXT:    andb {{[0-9]+}}(%esp), %dl
+; X86-SSE-NEXT:    movb %dl, (%eax)
+; X86-SSE-NEXT:    movb %cl, 1(%eax)
+; X86-SSE-NEXT:    movb %bh, 2(%eax)
+; X86-SSE-NEXT:    movb %bl, 3(%eax)
+; X86-SSE-NEXT:    movb %dh, 4(%eax)
+; X86-SSE-NEXT:    movb %ch, 5(%eax)
+; X86-SSE-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SSE-NEXT:    movb %cl, 6(%eax)
+; X86-SSE-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SSE-NEXT:    movb %cl, 7(%eax)
+; X86-SSE-NEXT:    jmp .LBB14_3
+; X86-SSE-NEXT:  .LBB14_2: # %identity
+; X86-SSE-NEXT:    movb %ch, (%eax)
+; X86-SSE-NEXT:    movb %cl, 1(%eax)
+; X86-SSE-NEXT:    movb %bh, 2(%eax)
+; X86-SSE-NEXT:    movb %bl, 3(%eax)
+; X86-SSE-NEXT:    movb %dh, 4(%eax)
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb %cl, 5(%eax)
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb %cl, 6(%eax)
+; X86-SSE-NEXT:    movb %dl, 7(%eax)
+; X86-SSE-NEXT:  .LBB14_3: # %identity
+; X86-SSE-NEXT:    addl $4, %esp
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_splat_v8i8:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB14_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    notb %al
+; X86-SSE2-NEXT:    movzbl %al, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X86-SSE2-NEXT:    pand %xmm0, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB14_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_splat_v8i8:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB14_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movb %dl, %ch
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI-NEXT:    notb %dl
+; X86-BMI-NEXT:    andb %dl, %ch
+; X86-BMI-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    andb %dl, %ch
+; X86-BMI-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    andb %dl, %ch
+; X86-BMI-NEXT:    andb %dl, %dh
+; X86-BMI-NEXT:    andb %dl, %bl
+; X86-BMI-NEXT:    andb %dl, %bh
+; X86-BMI-NEXT:    andb %dl, %cl
+; X86-BMI-NEXT:    andb {{[0-9]+}}(%esp), %dl
+; X86-BMI-NEXT:    movb %dl, (%eax)
+; X86-BMI-NEXT:    movb %cl, 1(%eax)
+; X86-BMI-NEXT:    movb %bh, 2(%eax)
+; X86-BMI-NEXT:    movb %bl, 3(%eax)
+; X86-BMI-NEXT:    movb %dh, 4(%eax)
+; X86-BMI-NEXT:    movb %ch, 5(%eax)
+; X86-BMI-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-BMI-NEXT:    movb %cl, 6(%eax)
+; X86-BMI-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-BMI-NEXT:    movb %cl, 7(%eax)
+; X86-BMI-NEXT:    jmp .LBB14_3
+; X86-BMI-NEXT:  .LBB14_2: # %identity
+; X86-BMI-NEXT:    movb %ch, (%eax)
+; X86-BMI-NEXT:    movb %cl, 1(%eax)
+; X86-BMI-NEXT:    movb %bh, 2(%eax)
+; X86-BMI-NEXT:    movb %bl, 3(%eax)
+; X86-BMI-NEXT:    movb %dh, 4(%eax)
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb %cl, 5(%eax)
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb %cl, 6(%eax)
+; X86-BMI-NEXT:    movb %dl, 7(%eax)
+; X86-BMI-NEXT:  .LBB14_3: # %identity
+; X86-BMI-NEXT:    addl $4, %esp
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_splat_v8i8:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %esi, %esi
+; X64-NOAVX2-NEXT:    je .LBB14_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    notb %dil
+; X64-NOAVX2-NEXT:    movzbl %dil, %eax
+; X64-NOAVX2-NEXT:    movd %eax, %xmm1
+; X64-NOAVX2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X64-NOAVX2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X64-NOAVX2-NEXT:    pand %xmm0, %xmm1
+; X64-NOAVX2-NEXT:    movdqa %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB14_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_splat_v8i8:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %esi, %esi
+; X64-AVX2-NEXT:    je .LBB14_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    notb %dil
+; X64-AVX2-NEXT:    vmovd %edi, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT:  .LBB14_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor i8 %m, -1
+  %head = insertelement <8 x i8> poison, i8 %a, i8 0
+  %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <8 x i8> %splat, %x
+  ret <8 x i8> %masked
+
+identity:
+  ret <8 x i8> %x
+}
+
+define <8 x i8> @and_sink_not_splat_v8i8_swapped(<8 x i8> %x, i8 %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_splat_v8i8_swapped:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB15_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    notb %ch
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    andb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    andb %ch, %dh
+; X86-NEXT:    andb %ch, %bl
+; X86-NEXT:    andb %ch, %bh
+; X86-NEXT:    andb %ch, %cl
+; X86-NEXT:    andb %ch, %dl
+; X86-NEXT:    movb %dl, (%eax)
+; X86-NEXT:    movb %cl, 1(%eax)
+; X86-NEXT:    movb %bh, 2(%eax)
+; X86-NEXT:    movb %bl, 3(%eax)
+; X86-NEXT:    movb %dh, 4(%eax)
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, 5(%eax)
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, 6(%eax)
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb %cl, 7(%eax)
+; X86-NEXT:    jmp .LBB15_3
+; X86-NEXT:  .LBB15_2: # %identity
+; X86-NEXT:    movb %dl, (%eax)
+; X86-NEXT:    movb %cl, 1(%eax)
+; X86-NEXT:    movb %bh, 2(%eax)
+; X86-NEXT:    movb %bl, 3(%eax)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %cl, 4(%eax)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %cl, 5(%eax)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %cl, 6(%eax)
+; X86-NEXT:    movb %dh, 7(%eax)
+; X86-NEXT:  .LBB15_3: # %identity
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_splat_v8i8_swapped:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB15_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-SSE-NEXT:    notb %ch
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SSE-NEXT:    andb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-SSE-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-SSE-NEXT:    andb %ch, %dh
+; X86-SSE-NEXT:    andb %ch, %bl
+; X86-SSE-NEXT:    andb %ch, %bh
+; X86-SSE-NEXT:    andb %ch, %cl
+; X86-SSE-NEXT:    andb %ch, %dl
+; X86-SSE-NEXT:    movb %dl, (%eax)
+; X86-SSE-NEXT:    movb %cl, 1(%eax)
+; X86-SSE-NEXT:    movb %bh, 2(%eax)
+; X86-SSE-NEXT:    movb %bl, 3(%eax)
+; X86-SSE-NEXT:    movb %dh, 4(%eax)
+; X86-SSE-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SSE-NEXT:    movb %cl, 5(%eax)
+; X86-SSE-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SSE-NEXT:    movb %cl, 6(%eax)
+; X86-SSE-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SSE-NEXT:    movb %cl, 7(%eax)
+; X86-SSE-NEXT:    jmp .LBB15_3
+; X86-SSE-NEXT:  .LBB15_2: # %identity
+; X86-SSE-NEXT:    movb %dl, (%eax)
+; X86-SSE-NEXT:    movb %cl, 1(%eax)
+; X86-SSE-NEXT:    movb %bh, 2(%eax)
+; X86-SSE-NEXT:    movb %bl, 3(%eax)
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb %cl, 4(%eax)
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb %cl, 5(%eax)
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movb %cl, 6(%eax)
+; X86-SSE-NEXT:    movb %dh, 7(%eax)
+; X86-SSE-NEXT:  .LBB15_3: # %identity
+; X86-SSE-NEXT:    addl $4, %esp
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_splat_v8i8_swapped:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB15_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    notb %al
+; X86-SSE2-NEXT:    movzbl %al, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB15_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_splat_v8i8_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %bh
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB15_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI-NEXT:    notb %ch
+; X86-BMI-NEXT:    andb %ch, %dh
+; X86-BMI-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    andb %ch, %dh
+; X86-BMI-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-BMI-NEXT:    andb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-BMI-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-BMI-NEXT:    andb %ch, %dh
+; X86-BMI-NEXT:    andb %ch, %bl
+; X86-BMI-NEXT:    andb %ch, %bh
+; X86-BMI-NEXT:    andb %ch, %cl
+; X86-BMI-NEXT:    andb %ch, %dl
+; X86-BMI-NEXT:    movb %dl, (%eax)
+; X86-BMI-NEXT:    movb %cl, 1(%eax)
+; X86-BMI-NEXT:    movb %bh, 2(%eax)
+; X86-BMI-NEXT:    movb %bl, 3(%eax)
+; X86-BMI-NEXT:    movb %dh, 4(%eax)
+; X86-BMI-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-BMI-NEXT:    movb %cl, 5(%eax)
+; X86-BMI-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-BMI-NEXT:    movb %cl, 6(%eax)
+; X86-BMI-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-BMI-NEXT:    movb %cl, 7(%eax)
+; X86-BMI-NEXT:    jmp .LBB15_3
+; X86-BMI-NEXT:  .LBB15_2: # %identity
+; X86-BMI-NEXT:    movb %dl, (%eax)
+; X86-BMI-NEXT:    movb %cl, 1(%eax)
+; X86-BMI-NEXT:    movb %bh, 2(%eax)
+; X86-BMI-NEXT:    movb %bl, 3(%eax)
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb %cl, 4(%eax)
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb %cl, 5(%eax)
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movb %cl, 6(%eax)
+; X86-BMI-NEXT:    movb %dh, 7(%eax)
+; X86-BMI-NEXT:  .LBB15_3: # %identity
+; X86-BMI-NEXT:    addl $4, %esp
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_splat_v8i8_swapped:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %esi, %esi
+; X64-NOAVX2-NEXT:    je .LBB15_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    notb %dil
+; X64-NOAVX2-NEXT:    movzbl %dil, %eax
+; X64-NOAVX2-NEXT:    movd %eax, %xmm1
+; X64-NOAVX2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X64-NOAVX2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X64-NOAVX2-NEXT:    pand %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB15_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_splat_v8i8_swapped:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %esi, %esi
+; X64-AVX2-NEXT:    je .LBB15_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    notb %dil
+; X64-AVX2-NEXT:    vmovd %edi, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:  .LBB15_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor i8 %m, -1
+  %head = insertelement <8 x i8> poison, i8 %a, i8 0
+  %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <8 x i8> %x, %splat
+  ret <8 x i8> %masked
+
+identity:
+  ret <8 x i8> %x
+}
+
+define <4 x i32> @and_sink_not_splat_v4i32(<4 x i32> %x, i32 %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_splat_v4i32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB16_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %ebx, %ecx
+; X86-NEXT:    andl %ebx, %edx
+; X86-NEXT:    andl %ebx, %esi
+; X86-NEXT:    andl %edi, %ebx
+; X86-NEXT:    movl %ebx, (%eax)
+; X86-NEXT:    jmp .LBB16_3
+; X86-NEXT:  .LBB16_2: # %identity
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:  .LBB16_3: # %identity
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_splat_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    subl $32, %esp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB16_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    notl %ebx
+; X86-SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, (%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    andps %xmm0, %xmm1
+; X86-SSE-NEXT:    movaps %xmm1, (%eax)
+; X86-SSE-NEXT:    jmp .LBB16_3
+; X86-SSE-NEXT:  .LBB16_2: # %identity
+; X86-SSE-NEXT:    movl %edi, (%eax)
+; X86-SSE-NEXT:    movl %esi, 4(%eax)
+; X86-SSE-NEXT:    movl %edx, 8(%eax)
+; X86-SSE-NEXT:    movl %ecx, 12(%eax)
+; X86-SSE-NEXT:  .LBB16_3: # %identity
+; X86-SSE-NEXT:    addl $32, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_splat_v4i32:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB16_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    notl %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE2-NEXT:    pand %xmm0, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB16_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_splat_v4i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB16_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %ebx, %ecx
+; X86-BMI-NEXT:    andl %ebx, %edx
+; X86-BMI-NEXT:    andl %ebx, %esi
+; X86-BMI-NEXT:    andl %edi, %ebx
+; X86-BMI-NEXT:    movl %ebx, (%eax)
+; X86-BMI-NEXT:    jmp .LBB16_3
+; X86-BMI-NEXT:  .LBB16_2: # %identity
+; X86-BMI-NEXT:    movl %edi, (%eax)
+; X86-BMI-NEXT:  .LBB16_3: # %identity
+; X86-BMI-NEXT:    movl %esi, 4(%eax)
+; X86-BMI-NEXT:    movl %edx, 8(%eax)
+; X86-BMI-NEXT:    movl %ecx, 12(%eax)
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_splat_v4i32:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %esi, %esi
+; X64-NOAVX2-NEXT:    je .LBB16_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    notl %edi
+; X64-NOAVX2-NEXT:    movd %edi, %xmm1
+; X64-NOAVX2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X64-NOAVX2-NEXT:    pand %xmm0, %xmm1
+; X64-NOAVX2-NEXT:    movdqa %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB16_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_splat_v4i32:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %esi, %esi
+; X64-AVX2-NEXT:    je .LBB16_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    notl %edi
+; X64-AVX2-NEXT:    vmovd %edi, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX2-NEXT:  .LBB16_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor i32 %m, -1
+  %head = insertelement <4 x i32> poison, i32 %a, i32 0
+  %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i32> %splat, %x
+  ret <4 x i32> %masked
+
+identity:
+  ret <4 x i32> %x
+}
+
+define <4 x i32> @and_sink_not_splat_v4i32_swapped(<4 x i32> %x, i32 %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_splat_v4i32_swapped:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB17_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl %ebx, %ecx
+; X86-NEXT:    andl %ebx, %edx
+; X86-NEXT:    andl %ebx, %esi
+; X86-NEXT:    andl %ebx, %edi
+; X86-NEXT:  .LBB17_2: # %identity
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_splat_v4i32_swapped:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    subl $32, %esp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB17_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    notl %ebx
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %edi, (%esp)
+; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    andps %xmm0, %xmm1
+; X86-SSE-NEXT:    movaps %xmm1, (%eax)
+; X86-SSE-NEXT:    jmp .LBB17_3
+; X86-SSE-NEXT:  .LBB17_2: # %identity
+; X86-SSE-NEXT:    movl %edi, (%eax)
+; X86-SSE-NEXT:    movl %esi, 4(%eax)
+; X86-SSE-NEXT:    movl %edx, 8(%eax)
+; X86-SSE-NEXT:    movl %ecx, 12(%eax)
+; X86-SSE-NEXT:  .LBB17_3: # %identity
+; X86-SSE-NEXT:    addl $32, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_splat_v4i32_swapped:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB17_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    notl %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:  .LBB17_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_splat_v4i32_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB17_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    notl %ebx
+; X86-BMI-NEXT:    andl %ebx, %ecx
+; X86-BMI-NEXT:    andl %ebx, %edx
+; X86-BMI-NEXT:    andl %ebx, %esi
+; X86-BMI-NEXT:    andl %ebx, %edi
+; X86-BMI-NEXT:  .LBB17_2: # %identity
+; X86-BMI-NEXT:    movl %edi, (%eax)
+; X86-BMI-NEXT:    movl %esi, 4(%eax)
+; X86-BMI-NEXT:    movl %edx, 8(%eax)
+; X86-BMI-NEXT:    movl %ecx, 12(%eax)
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_splat_v4i32_swapped:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %esi, %esi
+; X64-NOAVX2-NEXT:    je .LBB17_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    notl %edi
+; X64-NOAVX2-NEXT:    movd %edi, %xmm1
+; X64-NOAVX2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X64-NOAVX2-NEXT:    pand %xmm1, %xmm0
+; X64-NOAVX2-NEXT:  .LBB17_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_splat_v4i32_swapped:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %esi, %esi
+; X64-AVX2-NEXT:    je .LBB17_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    notl %edi
+; X64-AVX2-NEXT:    vmovd %edi, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:  .LBB17_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor i32 %m, -1
+  %head = insertelement <4 x i32> poison, i32 %a, i32 0
+  %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i32> %x, %splat
+  ret <4 x i32> %masked
+
+identity:
+  ret <4 x i32> %x
+}
+
+define <4 x i64> @and_sink_not_splat_v4i64(<4 x i64> %x, i64 %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_splat_v4i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    je .LBB18_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    notl %ecx
+; X86-NEXT:    andl %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    andl %ecx, %ebx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    notl %edx
+; X86-NEXT:    andl %edx, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    andl %edx, %edi
+; X86-NEXT:    andl %edx, %ebp
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    movl %edi, 16(%eax)
+; X86-NEXT:    movl %esi, 20(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 28(%eax)
+; X86-NEXT:    jmp .LBB18_3
+; X86-NEXT:  .LBB18_2: # %identity
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    movl %edi, 16(%eax)
+; X86-NEXT:    movl %esi, 20(%eax)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl %edx, 28(%eax)
+; X86-NEXT:  .LBB18_3: # %identity
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_splat_v4i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    subl $8, %esp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    je .LBB18_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    notl %ecx
+; X86-SSE-NEXT:    andl %ecx, %edx
+; X86-SSE-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE-NEXT:    andl %ecx, %esi
+; X86-SSE-NEXT:    andl %ecx, %ebx
+; X86-SSE-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    notl %edx
+; X86-SSE-NEXT:    andl %edx, (%esp) # 4-byte Folded Spill
+; X86-SSE-NEXT:    andl %edx, %edi
+; X86-SSE-NEXT:    andl %edx, %ebp
+; X86-SSE-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    movl %edx, (%eax)
+; X86-SSE-NEXT:    movl %ecx, 4(%eax)
+; X86-SSE-NEXT:    movl %ebp, 8(%eax)
+; X86-SSE-NEXT:    movl %ebx, 12(%eax)
+; X86-SSE-NEXT:    movl %edi, 16(%eax)
+; X86-SSE-NEXT:    movl %esi, 20(%eax)
+; X86-SSE-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-SSE-NEXT:    movl %ecx, 24(%eax)
+; X86-SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE-NEXT:    movl %ecx, 28(%eax)
+; X86-SSE-NEXT:    jmp .LBB18_3
+; X86-SSE-NEXT:  .LBB18_2: # %identity
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, (%eax)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, 4(%eax)
+; X86-SSE-NEXT:    movl %ebp, 8(%eax)
+; X86-SSE-NEXT:    movl %ebx, 12(%eax)
+; X86-SSE-NEXT:    movl %edi, 16(%eax)
+; X86-SSE-NEXT:    movl %esi, 20(%eax)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, 24(%eax)
+; X86-SSE-NEXT:    movl %edx, 28(%eax)
+; X86-SSE-NEXT:  .LBB18_3: # %identity
+; X86-SSE-NEXT:    addl $8, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_splat_v4i64:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB18_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1]
+; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE2-NEXT:    pxor %xmm3, %xmm2
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pand %xmm1, %xmm2
+; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE2-NEXT:  .LBB18_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_splat_v4i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebp
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    subl $8, %esp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    je .LBB18_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andl %ecx, %edx
+; X86-BMI-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-BMI-NEXT:    andl %ecx, %esi
+; X86-BMI-NEXT:    andl %ecx, %ebx
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    notl %edx
+; X86-BMI-NEXT:    andl %edx, (%esp) # 4-byte Folded Spill
+; X86-BMI-NEXT:    andl %edx, %edi
+; X86-BMI-NEXT:    andl %edx, %ebp
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl %edx, (%eax)
+; X86-BMI-NEXT:    movl %ecx, 4(%eax)
+; X86-BMI-NEXT:    movl %ebp, 8(%eax)
+; X86-BMI-NEXT:    movl %ebx, 12(%eax)
+; X86-BMI-NEXT:    movl %edi, 16(%eax)
+; X86-BMI-NEXT:    movl %esi, 20(%eax)
+; X86-BMI-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-BMI-NEXT:    movl %ecx, 24(%eax)
+; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-BMI-NEXT:    movl %ecx, 28(%eax)
+; X86-BMI-NEXT:    jmp .LBB18_3
+; X86-BMI-NEXT:  .LBB18_2: # %identity
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, (%eax)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, 4(%eax)
+; X86-BMI-NEXT:    movl %ebp, 8(%eax)
+; X86-BMI-NEXT:    movl %ebx, 12(%eax)
+; X86-BMI-NEXT:    movl %edi, 16(%eax)
+; X86-BMI-NEXT:    movl %esi, 20(%eax)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, 24(%eax)
+; X86-BMI-NEXT:    movl %edx, 28(%eax)
+; X86-BMI-NEXT:  .LBB18_3: # %identity
+; X86-BMI-NEXT:    addl $8, %esp
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    popl %ebp
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_splat_v4i64:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %esi, %esi
+; X64-NOAVX2-NEXT:    je .LBB18_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    notq %rdi
+; X64-NOAVX2-NEXT:    movq %rdi, %xmm2
+; X64-NOAVX2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
+; X64-NOAVX2-NEXT:    pand %xmm2, %xmm0
+; X64-NOAVX2-NEXT:    pand %xmm2, %xmm1
+; X64-NOAVX2-NEXT:  .LBB18_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_splat_v4i64:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %esi, %esi
+; X64-AVX2-NEXT:    je .LBB18_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    notq %rdi
+; X64-AVX2-NEXT:    vmovq %rdi, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
+; X64-AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT:  .LBB18_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor i64 %m, -1
+  %head = insertelement <4 x i64> poison, i64 %a, i64 0
+  %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i64> %splat, %x
+  ret <4 x i64> %masked
+
+identity:
+  ret <4 x i64> %x
+}
+
+define <4 x i64> @and_sink_not_splat_v4i64_swapped(<4 x i64> %x, i64 %m, i1 zeroext %cond) nounwind {
+; X86-LABEL: and_sink_not_splat_v4i64_swapped:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    je .LBB19_2
+; X86-NEXT:  # %bb.1: # %mask
+; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %esi, (%esp) # 4-byte Folded Spill
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl %esi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl %esi, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    andl %esi, %ebx
+; X86-NEXT:    andl %esi, %ebp
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl %ebx, 16(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 20(%eax)
+; X86-NEXT:    movl %edi, 24(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 28(%eax)
+; X86-NEXT:    jmp .LBB19_3
+; X86-NEXT:  .LBB19_2: # %identity
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    movl %edi, 20(%eax)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl %esi, 28(%eax)
+; X86-NEXT:  .LBB19_3: # %identity
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X86-SSE-LABEL: and_sink_not_splat_v4i64_swapped:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    pushl %ebx
+; X86-SSE-NEXT:    pushl %edi
+; X86-SSE-NEXT:    pushl %esi
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SSE-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT:    je .LBB19_2
+; X86-SSE-NEXT:  # %bb.1: # %mask
+; X86-SSE-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    andl %esi, (%esp) # 4-byte Folded Spill
+; X86-SSE-NEXT:    andl %esi, %edi
+; X86-SSE-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE-NEXT:    andl %esi, %ebx
+; X86-SSE-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE-NEXT:    andl %esi, %ecx
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE-NEXT:    notl %esi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SSE-NEXT:    andl %esi, %edi
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-SSE-NEXT:    andl %esi, %ebx
+; X86-SSE-NEXT:    andl %esi, %ebp
+; X86-SSE-NEXT:    andl %esi, %edx
+; X86-SSE-NEXT:    movl %edx, (%eax)
+; X86-SSE-NEXT:    movl %ecx, 4(%eax)
+; X86-SSE-NEXT:    movl %ebp, 8(%eax)
+; X86-SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE-NEXT:    movl %ecx, 12(%eax)
+; X86-SSE-NEXT:    movl %ebx, 16(%eax)
+; X86-SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE-NEXT:    movl %ecx, 20(%eax)
+; X86-SSE-NEXT:    movl %edi, 24(%eax)
+; X86-SSE-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-SSE-NEXT:    movl %ecx, 28(%eax)
+; X86-SSE-NEXT:    jmp .LBB19_3
+; X86-SSE-NEXT:  .LBB19_2: # %identity
+; X86-SSE-NEXT:    movl %edx, (%eax)
+; X86-SSE-NEXT:    movl %ecx, 4(%eax)
+; X86-SSE-NEXT:    movl %ebp, 8(%eax)
+; X86-SSE-NEXT:    movl %ebx, 12(%eax)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, 16(%eax)
+; X86-SSE-NEXT:    movl %edi, 20(%eax)
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movl %ecx, 24(%eax)
+; X86-SSE-NEXT:    movl %esi, 28(%eax)
+; X86-SSE-NEXT:  .LBB19_3: # %identity
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    popl %esi
+; X86-SSE-NEXT:    popl %edi
+; X86-SSE-NEXT:    popl %ebx
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: and_sink_not_splat_v4i64_swapped:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    je .LBB19_2
+; X86-SSE2-NEXT:  # %bb.1: # %mask
+; X86-SSE2-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
+; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
+; X86-SSE2-NEXT:    pand %xmm3, %xmm0
+; X86-SSE2-NEXT:    pand %xmm3, %xmm1
+; X86-SSE2-NEXT:  .LBB19_2: # %identity
+; X86-SSE2-NEXT:    retl
+;
+; X86-BMI-LABEL: and_sink_not_splat_v4i64_swapped:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %ebp
+; X86-BMI-NEXT:    pushl %ebx
+; X86-BMI-NEXT:    pushl %edi
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    subl $12, %esp
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    je .LBB19_2
+; X86-BMI-NEXT:  # %bb.1: # %mask
+; X86-BMI-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    andl %esi, (%esp) # 4-byte Folded Spill
+; X86-BMI-NEXT:    andl %esi, %edi
+; X86-BMI-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-BMI-NEXT:    andl %esi, %ebx
+; X86-BMI-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-BMI-NEXT:    andl %esi, %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI-NEXT:    notl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI-NEXT:    andl %esi, %edi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI-NEXT:    andl %esi, %ebx
+; X86-BMI-NEXT:    andl %esi, %ebp
+; X86-BMI-NEXT:    andl %esi, %edx
+; X86-BMI-NEXT:    movl %edx, (%eax)
+; X86-BMI-NEXT:    movl %ecx, 4(%eax)
+; X86-BMI-NEXT:    movl %ebp, 8(%eax)
+; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-BMI-NEXT:    movl %ecx, 12(%eax)
+; X86-BMI-NEXT:    movl %ebx, 16(%eax)
+; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-BMI-NEXT:    movl %ecx, 20(%eax)
+; X86-BMI-NEXT:    movl %edi, 24(%eax)
+; X86-BMI-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-BMI-NEXT:    movl %ecx, 28(%eax)
+; X86-BMI-NEXT:    jmp .LBB19_3
+; X86-BMI-NEXT:  .LBB19_2: # %identity
+; X86-BMI-NEXT:    movl %edx, (%eax)
+; X86-BMI-NEXT:    movl %ecx, 4(%eax)
+; X86-BMI-NEXT:    movl %ebp, 8(%eax)
+; X86-BMI-NEXT:    movl %ebx, 12(%eax)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, 16(%eax)
+; X86-BMI-NEXT:    movl %edi, 20(%eax)
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl %ecx, 24(%eax)
+; X86-BMI-NEXT:    movl %esi, 28(%eax)
+; X86-BMI-NEXT:  .LBB19_3: # %identity
+; X86-BMI-NEXT:    addl $12, %esp
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    popl %edi
+; X86-BMI-NEXT:    popl %ebx
+; X86-BMI-NEXT:    popl %ebp
+; X86-BMI-NEXT:    retl $4
+;
+; X64-NOAVX2-LABEL: and_sink_not_splat_v4i64_swapped:
+; X64-NOAVX2:       # %bb.0:
+; X64-NOAVX2-NEXT:    testl %esi, %esi
+; X64-NOAVX2-NEXT:    je .LBB19_2
+; X64-NOAVX2-NEXT:  # %bb.1: # %mask
+; X64-NOAVX2-NEXT:    notq %rdi
+; X64-NOAVX2-NEXT:    movq %rdi, %xmm2
+; X64-NOAVX2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
+; X64-NOAVX2-NEXT:    pand %xmm2, %xmm0
+; X64-NOAVX2-NEXT:    pand %xmm2, %xmm1
+; X64-NOAVX2-NEXT:  .LBB19_2: # %identity
+; X64-NOAVX2-NEXT:    retq
+;
+; X64-AVX2-LABEL: and_sink_not_splat_v4i64_swapped:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    testl %esi, %esi
+; X64-AVX2-NEXT:    je .LBB19_2
+; X64-AVX2-NEXT:  # %bb.1: # %mask
+; X64-AVX2-NEXT:    notq %rdi
+; X64-AVX2-NEXT:    vmovq %rdi, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
+; X64-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:  .LBB19_2: # %identity
+; X64-AVX2-NEXT:    retq
+  %a = xor i64 %m, -1
+  %head = insertelement <4 x i64> poison, i64 %a, i64 0
+  %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer
+  br i1 %cond, label %mask, label %identity
+
+mask:
+  %masked = and <4 x i64> %x, %splat
+  ret <4 x i64> %masked
+
+identity:
+  ret <4 x i64> %x
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64-BMI: {{.*}}
+; X64-NOBMI: {{.*}}



More information about the llvm-commits mailing list