[llvm] Test assert zext (PR #172138)

via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 13 04:30:24 PST 2025


https://github.com/actinks updated https://github.com/llvm/llvm-project/pull/172138

>From a80b806e05cf32ce5cb0426d08d0d439a14d6432 Mon Sep 17 00:00:00 2001
From: actink <actink at 163.com>
Date: Fri, 12 Dec 2025 23:08:25 +0800
Subject: [PATCH 1/4] [SDAG] fix miss opt: shl nuw + zext adds unnecessary
 masking

close: #171750
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++-
 .../X86/fold-int-pow2-with-fmul-or-fdiv.ll    |  4 --
 llvm/test/CodeGen/X86/known-never-zero.ll     |  4 --
 llvm/test/CodeGen/X86/known-pow2.ll           |  3 +-
 llvm/test/CodeGen/X86/pr171750.ll             | 47 +++++++++++++++++++
 llvm/test/CodeGen/X86/pr89877.ll              |  2 -
 6 files changed, 52 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr171750.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6a99d4e29b64f..a0212774d8f17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1673,8 +1673,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
 
     SDLoc DL(Op);
     SDValue N1 = Op.getOperand(1);
-    SDValue RV =
-        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+    SDValue POp = DAG.getNode(Opc, DL, PVT, N0, N1);
+    if (Opc == ISD::SHL && Op->getFlags().hasNoUnsignedWrap())
+      POp = DAG.getNode(ISD::AssertZext, DL, PVT, POp, DAG.getValueType(VT));
+    SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, POp);
 
     if (Replace)
       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index b655bda68f906..dea6b47a5961a 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -1529,7 +1529,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
 ; CHECK-SSE-NEXT:    movl $1, %eax
 ; CHECK-SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-SSE-NEXT:    shll %cl, %eax
-; CHECK-SSE-NEXT:    movzwl %ax, %eax
 ; CHECK-SSE-NEXT:    cvtsi2ss %eax, %xmm0
 ; CHECK-SSE-NEXT:    callq __truncsfhf2 at PLT
 ; CHECK-SSE-NEXT:    callq __extendhfsf2 at PLT
@@ -1547,7 +1546,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
 ; CHECK-AVX2-NEXT:    movl $1, %eax
 ; CHECK-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-AVX2-NEXT:    shll %cl, %eax
-; CHECK-AVX2-NEXT:    movzwl %ax, %eax
 ; CHECK-AVX2-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2 at PLT
 ; CHECK-AVX2-NEXT:    callq __extendhfsf2 at PLT
@@ -1563,7 +1561,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
 ; CHECK-ONLY-AVX512F-NEXT:    movl $1, %eax
 ; CHECK-ONLY-AVX512F-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-ONLY-AVX512F-NEXT:    shll %cl, %eax
-; CHECK-ONLY-AVX512F-NEXT:    movzwl %ax, %eax
 ; CHECK-ONLY-AVX512F-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
 ; CHECK-ONLY-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; CHECK-ONLY-AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm0
@@ -1576,7 +1573,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
 ; CHECK-SKX:       # %bb.0:
 ; CHECK-SKX-NEXT:    movl $1, %eax
 ; CHECK-SKX-NEXT:    shlxl %edi, %eax, %eax
-; CHECK-SKX-NEXT:    movzwl %ax, %eax
 ; CHECK-SKX-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
 ; CHECK-SKX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; CHECK-SKX-NEXT:    vcvtph2ps %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 63336ffa7c6c8..758ebc6177f14 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1318,7 +1318,6 @@ define i32 @zext_known_nonzero(i16 %xx) {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $256, %eax # imm = 0x100
 ; X86-NEXT:    shll %cl, %eax
-; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    rep bsfl %eax, %eax
 ; X86-NEXT:    retl
 ;
@@ -1328,7 +1327,6 @@ define i32 @zext_known_nonzero(i16 %xx) {
 ; X64-NEXT:    movl $256, %eax # imm = 0x100
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    rep bsfl %eax, %eax
 ; X64-NEXT:    retq
   %x = shl nuw nsw i16 256, %xx
@@ -1363,7 +1361,6 @@ define i32 @sext_known_nonzero(i16 %xx) {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $256, %eax # imm = 0x100
 ; X86-NEXT:    shll %cl, %eax
-; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    rep bsfl %eax, %eax
 ; X86-NEXT:    retl
 ;
@@ -1373,7 +1370,6 @@ define i32 @sext_known_nonzero(i16 %xx) {
 ; X64-NEXT:    movl $256, %eax # imm = 0x100
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    rep bsfl %eax, %eax
 ; X64-NEXT:    retq
   %x = shl nuw nsw i16 256, %xx
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 019bca7e53b4c..09ceaf8eca3ac 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -847,8 +847,7 @@ define i1 @pow2_though_zext(i32 %x, i16 %y) {
 ; CHECK-NEXT:    movl $4, %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shll %cl, %eax
-; CHECK-NEXT:    andl %edi, %eax
-; CHECK-NEXT:    testl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT:    testl %eax, %edi
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
   %dd = shl nuw nsw i16 4, %y
diff --git a/llvm/test/CodeGen/X86/pr171750.ll b/llvm/test/CodeGen/X86/pr171750.ll
new file mode 100644
index 0000000000000..b70536ee215d3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr171750.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
+
+define i32 @_Z1ft(i16 zeroext %0) {
+; X86-LABEL: _Z1ft:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: _Z1ft:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (,%rdi,8), %eax
+; X64-NEXT:    retq
+entry:
+  %3 = shl nuw i16 %0, 3
+  %4 = zext i16 %3 to i32
+  ret i32 %4
+}
+
+define i32 @_Z1gt(i16 zeroext %x) {
+; X86-LABEL: _Z1gt:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (,%eax,8), %ecx
+; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: _Z1gt:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (,%rdi,8), %eax
+; X64-NEXT:    shll $16, %edi
+; X64-NEXT:    orl %edi, %eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext nneg i16 %x to i32
+  %shl = shl nuw i16 %x, 3
+  %conv3 = zext i16 %shl to i32
+  %shl5 = shl nuw nsw i32 %conv, 16
+  %or = or disjoint i32 %shl5, %conv3
+  ret i32 %or
+}
+
diff --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll
index a40ad8f941278..eeb10edb1002f 100644
--- a/llvm/test/CodeGen/X86/pr89877.ll
+++ b/llvm/test/CodeGen/X86/pr89877.ll
@@ -86,7 +86,6 @@ define i32 @sext_known_nonzero_nuw_nsw(i16 %xx) {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $256, %eax # imm = 0x100
 ; X86-NEXT:    shll %cl, %eax
-; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    rep bsfl %eax, %eax
 ; X86-NEXT:    retl
 ;
@@ -96,7 +95,6 @@ define i32 @sext_known_nonzero_nuw_nsw(i16 %xx) {
 ; X64-NEXT:    movl $256, %eax # imm = 0x100
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    rep bsfl %eax, %eax
 ; X64-NEXT:    retq
   %x = shl nuw nsw i16 256, %xx

>From dadbde145a39e14dbb2b5ec870fe99b73e3f97cb Mon Sep 17 00:00:00 2001
From: actink <actink at 163.com>
Date: Sat, 13 Dec 2025 11:49:43 +0800
Subject: [PATCH 2/4] fixup shl's promote, add trunc flags

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 +++++++++++++------
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  3 +++
 .../CodeGen/X86/{pr171750.ll => pr172046.ll}  | 14 +++++++------
 llvm/test/CodeGen/X86/pr89877.ll              |  2 --
 4 files changed, 25 insertions(+), 14 deletions(-)
 rename llvm/test/CodeGen/X86/{pr171750.ll => pr172046.ll} (72%)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a0212774d8f17..666b52be42b72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1659,24 +1659,32 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
 
     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
 
+    SDNodeFlags Flags;
     bool Replace = false;
     SDValue N0 = Op.getOperand(0);
     if (Opc == ISD::SRA)
       N0 = SExtPromoteOperand(N0, PVT);
     else if (Opc == ISD::SRL)
       N0 = ZExtPromoteOperand(N0, PVT);
-    else
-      N0 = PromoteOperand(N0, PVT, Replace);
+    else {
+      if (Op->getFlags().hasNoUnsignedWrap()) {
+        Flags = SDNodeFlags::NoUnsignedWrap;
+        N0 = ZExtPromoteOperand(N0, PVT);
+      } else if (Op->getFlags().hasNoSignedWrap()) {
+        Flags = SDNodeFlags::NoSignedWrap;
+        N0 = SExtPromoteOperand(N0, PVT);
+      } else
+        N0 = PromoteOperand(N0, PVT, Replace);
+    }
 
     if (!N0.getNode())
       return SDValue();
 
     SDLoc DL(Op);
     SDValue N1 = Op.getOperand(1);
-    SDValue POp = DAG.getNode(Opc, DL, PVT, N0, N1);
-    if (Opc == ISD::SHL && Op->getFlags().hasNoUnsignedWrap())
-      POp = DAG.getNode(ISD::AssertZext, DL, PVT, POp, DAG.getValueType(VT));
-    SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, POp);
+
+    SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT,
+                             DAG.getNode(Opc, DL, PVT, N0, N1), Flags);
 
     if (Replace)
       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cbe3236eb80d9..7795a004d658d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1572,6 +1572,9 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
   assert(VT.bitsLE(OpVT) && "Not extending!");
   if (OpVT == VT)
     return Op;
+  if (Op.getOpcode() == ISD::AssertZext &&
+      cast<VTSDNode>(Op.getOperand(1))->getVT().bitsLE(VT))
+    return Op;
   APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(),
                                    VT.getScalarSizeInBits());
   return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT));
diff --git a/llvm/test/CodeGen/X86/pr171750.ll b/llvm/test/CodeGen/X86/pr172046.ll
similarity index 72%
rename from llvm/test/CodeGen/X86/pr171750.ll
rename to llvm/test/CodeGen/X86/pr172046.ll
index b70536ee215d3..df8c82adb3a59 100644
--- a/llvm/test/CodeGen/X86/pr171750.ll
+++ b/llvm/test/CodeGen/X86/pr172046.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
 
 define i32 @_Z1ft(i16 zeroext %0) {
 ; X86-LABEL: _Z1ft:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: _Z1ft:
@@ -24,17 +25,18 @@ define i32 @_Z1gt(i16 zeroext %x) {
 ; X86-LABEL: _Z1gt:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (,%eax,8), %ecx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $8191, %ecx # imm = 0x1FFF
 ; X86-NEXT:    shll $16, %eax
-; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    leal (%eax,%ecx,8), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: _Z1gt:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (,%rdi,8), %eax
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $16, %edi
-; X64-NEXT:    orl %edi, %eax
+; X64-NEXT:    leal (%rdi,%rax,8), %eax
 ; X64-NEXT:    retq
 entry:
   %conv = zext nneg i16 %x to i32
diff --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll
index eeb10edb1002f..ed85e7fe9fb60 100644
--- a/llvm/test/CodeGen/X86/pr89877.ll
+++ b/llvm/test/CodeGen/X86/pr89877.ll
@@ -61,7 +61,6 @@ define i32 @sext_known_nonzero_nsw(i16 %xx) {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $256, %eax # imm = 0x100
 ; X86-NEXT:    shll %cl, %eax
-; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    rep bsfl %eax, %eax
 ; X86-NEXT:    retl
 ;
@@ -71,7 +70,6 @@ define i32 @sext_known_nonzero_nsw(i16 %xx) {
 ; X64-NEXT:    movl $256, %eax # imm = 0x100
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    rep bsfl %eax, %eax
 ; X64-NEXT:    retq
   %x = shl nsw i16 256, %xx

>From 04ad5874467c469de77c7a4fa4ffc36b85b17ab0 Mon Sep 17 00:00:00 2001
From: actink <actink at 163.com>
Date: Sat, 13 Dec 2025 16:08:30 +0800
Subject: [PATCH 3/4] test assert vs trunc flags

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 666b52be42b72..e0998cfd096ee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1659,7 +1659,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
 
     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
 
-    SDNodeFlags Flags;
     bool Replace = false;
     SDValue N0 = Op.getOperand(0);
     if (Opc == ISD::SRA)
@@ -1667,13 +1666,11 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
     else if (Opc == ISD::SRL)
       N0 = ZExtPromoteOperand(N0, PVT);
     else {
-      if (Op->getFlags().hasNoUnsignedWrap()) {
-        Flags = SDNodeFlags::NoUnsignedWrap;
+      if (Op->getFlags().hasNoUnsignedWrap())
         N0 = ZExtPromoteOperand(N0, PVT);
-      } else if (Op->getFlags().hasNoSignedWrap()) {
-        Flags = SDNodeFlags::NoSignedWrap;
+      else if (Op->getFlags().hasNoSignedWrap())
         N0 = SExtPromoteOperand(N0, PVT);
-      } else
+      else
         N0 = PromoteOperand(N0, PVT, Replace);
     }
 
@@ -1682,9 +1679,14 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
 
     SDLoc DL(Op);
     SDValue N1 = Op.getOperand(1);
-
-    SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT,
-                             DAG.getNode(Opc, DL, PVT, N0, N1), Flags);
+    SDValue POp = DAG.getNode(Opc, DL, PVT, N0, N1);
+    if (Opc == ISD::SRL ||
+        (Opc == ISD::SHL && Op->getFlags().hasNoUnsignedWrap()))
+      POp = DAG.getNode(ISD::AssertZext, DL, PVT, POp, DAG.getValueType(VT));
+    else if (Opc == ISD::SRA ||
+             (Opc == ISD::SHL && Op->getFlags().hasNoSignedWrap()))
+      POp = DAG.getNode(ISD::AssertSext, DL, PVT, POp, DAG.getValueType(VT));
+    SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, POp);
 
     if (Replace)
       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());

>From e51edf39bc528f002aa0acb2b32971ec955090ed Mon Sep 17 00:00:00 2001
From: actink <actink at 163.com>
Date: Sat, 13 Dec 2025 20:29:53 +0800
Subject: [PATCH 4/4] update failed cases

---
 .../CodeGen/X86/2008-05-12-tailmerge-5.ll     |   4 +-
 llvm/test/CodeGen/X86/andnot-patterns.ll      |  44 ++-
 ...d_vector_inreg_of_broadcast_from_memory.ll | 290 +++++++++---------
 llvm/test/CodeGen/X86/atomic-rm-bit-test.ll   |   2 +
 llvm/test/CodeGen/X86/avgceils-scalar.ll      |   4 +
 llvm/test/CodeGen/X86/avgfloors-scalar.ll     |  36 ++-
 llvm/test/CodeGen/X86/bitreverse.ll           |  49 ++-
 llvm/test/CodeGen/X86/buildvec-insertvec.ll   |   4 +-
 llvm/test/CodeGen/X86/combine-sdiv.ll         |   2 +-
 .../CodeGen/X86/const-shift-of-constmasked.ll |  28 +-
 llvm/test/CodeGen/X86/ctpop-mask.ll           |   8 +-
 llvm/test/CodeGen/X86/dagcombine-shifts.ll    |  15 +-
 llvm/test/CodeGen/X86/extract-store.ll        |   6 +-
 .../CodeGen/X86/field-extract-use-trunc.ll    |   5 +-
 llvm/test/CodeGen/X86/fixup-bw-copy.ll        |  30 +-
 llvm/test/CodeGen/X86/h-register-store.ll     |  69 +++++
 llvm/test/CodeGen/X86/h-registers-0.ll        |  27 +-
 llvm/test/CodeGen/X86/h-registers-3.ll        |   8 +-
 llvm/test/CodeGen/X86/i16lshr8pat.ll          |   9 +
 .../test/CodeGen/X86/ins_subreg_coalesce-2.ll |   5 +-
 llvm/test/CodeGen/X86/isel-shift.ll           |   8 +-
 llvm/test/CodeGen/X86/known-signbits-shl.ll   |   2 +-
 .../test/CodeGen/X86/load-scalar-as-vector.ll |   4 +-
 llvm/test/CodeGen/X86/masked_compressstore.ll |  18 +-
 llvm/test/CodeGen/X86/masked_store.ll         |  54 ++--
 llvm/test/CodeGen/X86/masked_store_trunc.ll   |  90 ++++--
 .../CodeGen/X86/masked_store_trunc_ssat.ll    |  90 ++++--
 .../CodeGen/X86/masked_store_trunc_usat.ll    |  90 ++++--
 llvm/test/CodeGen/X86/parity-vec.ll           |  18 +-
 llvm/test/CodeGen/X86/popcnt.ll               |  74 ++---
 llvm/test/CodeGen/X86/pr172046.ll             |  14 +-
 llvm/test/CodeGen/X86/pr32420.ll              |   4 +-
 llvm/test/CodeGen/X86/pr44915.ll              |  15 +-
 llvm/test/CodeGen/X86/pr77459.ll              |  18 +-
 llvm/test/CodeGen/X86/rotate-extract.ll       |  28 +-
 llvm/test/CodeGen/X86/sdiv_fix.ll             |  14 +-
 llvm/test/CodeGen/X86/sdiv_fix_sat.ll         |   6 +-
 llvm/test/CodeGen/X86/select-smin-smax.ll     |   2 +-
 llvm/test/CodeGen/X86/select-sra.ll           |   2 +-
 llvm/test/CodeGen/X86/setcc.ll                |  23 +-
 llvm/test/CodeGen/X86/shift-mask.ll           |   6 +-
 llvm/test/CodeGen/X86/smax.ll                 |   6 +-
 llvm/test/CodeGen/X86/smin.ll                 |   6 +-
 llvm/test/CodeGen/X86/smul_fix.ll             |  22 +-
 llvm/test/CodeGen/X86/smul_fix_sat.ll         |  24 +-
 .../CodeGen/X86/speculative-load-hardening.ll |  16 +-
 llvm/test/CodeGen/X86/sshl_sat.ll             |   8 +-
 llvm/test/CodeGen/X86/udiv_fix.ll             |   8 +-
 llvm/test/CodeGen/X86/udiv_fix_sat.ll         |   8 +-
 llvm/test/CodeGen/X86/umax.ll                 |   6 +-
 llvm/test/CodeGen/X86/umin.ll                 |   6 +-
 llvm/test/CodeGen/X86/umul_fix_sat.ll         |  14 +-
 llvm/test/CodeGen/X86/ushl_sat.ll             |   8 +-
 llvm/test/CodeGen/X86/vector-bitreverse.ll    |  38 ++-
 ...d_vector_inreg_of_broadcast_from_memory.ll | 106 +++----
 55 files changed, 860 insertions(+), 641 deletions(-)

diff --git a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 0d63779227554..f2ed96a659246 100644
--- a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -34,14 +34,14 @@ define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signex
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    shll $7, %eax
 ; CHECK-NEXT:    cwtl
-; CHECK-NEXT:    shrl $7, %eax
+; CHECK-NEXT:    sarl $7, %eax
 ; CHECK-NEXT:    cmpw {{[0-9]+}}(%rsp), %ax
 ; CHECK-NEXT:    jne LBB0_6
 ; CHECK-NEXT:  ## %bb.3: ## %bb51
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    shll $7, %eax
 ; CHECK-NEXT:    cwtl
-; CHECK-NEXT:    shrl $7, %eax
+; CHECK-NEXT:    sarl $7, %eax
 ; CHECK-NEXT:    cmpw {{[0-9]+}}(%rsp), %ax
 ; CHECK-NEXT:    jne LBB0_6
 ; CHECK-NEXT:  ## %bb.4: ## %bb67
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index fc573fbd4fc99..3f181d4e9180d 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -960,9 +960,9 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: andnot_bitreverse_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    rolw $8, %ax
-; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    rolw $8, %cx
+; X86-NEXT:    movzwl %cx, %eax
 ; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
 ; X86-NEXT:    shll $4, %ecx
 ; X86-NEXT:    shrl $4, %eax
@@ -985,19 +985,18 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i16:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NOBMI-NEXT:    rolw $8, %si
-; X64-NOBMI-NEXT:    movl %esi, %eax
-; X64-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X64-NOBMI-NEXT:    shll $4, %eax
-; X64-NOBMI-NEXT:    shrl $4, %esi
+; X64-NOBMI-NEXT:    movzwl %si, %eax
 ; X64-NOBMI-NEXT:    andl $3855, %esi # imm = 0xF0F
-; X64-NOBMI-NEXT:    orl %eax, %esi
-; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    shll $4, %esi
+; X64-NOBMI-NEXT:    shrl $4, %eax
+; X64-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-NOBMI-NEXT:    orl %esi, %eax
+; X64-NOBMI-NEXT:    movl %eax, %ecx
+; X64-NOBMI-NEXT:    andl $13107, %ecx # imm = 0x3333
+; X64-NOBMI-NEXT:    shrl $2, %eax
 ; X64-NOBMI-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-NOBMI-NEXT:    shrl $2, %esi
-; X64-NOBMI-NEXT:    andl $13107, %esi # imm = 0x3333
-; X64-NOBMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-NOBMI-NEXT:    leal (%rax,%rcx,4), %eax
 ; X64-NOBMI-NEXT:    movl %eax, %ecx
 ; X64-NOBMI-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; X64-NOBMI-NEXT:    shrl %eax
@@ -1010,19 +1009,18 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ;
 ; X64-BMI-LABEL: andnot_bitreverse_i16:
 ; X64-BMI:       # %bb.0:
-; X64-BMI-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-BMI-NEXT:    rolw $8, %si
-; X64-BMI-NEXT:    movl %esi, %eax
-; X64-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X64-BMI-NEXT:    shll $4, %eax
-; X64-BMI-NEXT:    shrl $4, %esi
+; X64-BMI-NEXT:    movzwl %si, %eax
 ; X64-BMI-NEXT:    andl $3855, %esi # imm = 0xF0F
-; X64-BMI-NEXT:    orl %eax, %esi
-; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    shll $4, %esi
+; X64-BMI-NEXT:    shrl $4, %eax
+; X64-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-BMI-NEXT:    orl %esi, %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $13107, %ecx # imm = 0x3333
+; X64-BMI-NEXT:    shrl $2, %eax
 ; X64-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-BMI-NEXT:    shrl $2, %esi
-; X64-BMI-NEXT:    andl $13107, %esi # imm = 0x3333
-; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    leal (%rax,%rcx,4), %eax
 ; X64-BMI-NEXT:    movl %eax, %ecx
 ; X64-BMI-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; X64-BMI-NEXT:    shrl %eax
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index 49eb82e8434cf..0cf42e721a306 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -5002,203 +5002,197 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
 ;
 ; AVX1-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
 ; AVX1:       # %bb.0:
+; AVX1-NEXT:    pushq %rbp
 ; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    movq 16(%rdi), %rcx
-; AVX1-NEXT:    movq %rcx, %rax
-; AVX1-NEXT:    movq %rcx, %r8
-; AVX1-NEXT:    movq %rcx, %r9
-; AVX1-NEXT:    movq %rcx, %r10
-; AVX1-NEXT:    movl %ecx, %r11d
-; AVX1-NEXT:    movl %ecx, %ebx
-; AVX1-NEXT:    vmovd %ecx, %xmm0
-; AVX1-NEXT:    shrl $8, %ecx
+; AVX1-NEXT:    movq %rdx, %r9
+; AVX1-NEXT:    movq 16(%rdi), %rax
+; AVX1-NEXT:    movq %rax, %r8
+; AVX1-NEXT:    movq %rax, %rbx
+; AVX1-NEXT:    movzbl %ah, %ecx
+; AVX1-NEXT:    vmovd %eax, %xmm0
 ; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $16, %ebx
-; AVX1-NEXT:    vpinsrb $2, %ebx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $24, %r11d
-; AVX1-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $32, %r10
-; AVX1-NEXT:    vpinsrb $4, %r10d, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $40, %r9
-; AVX1-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
+; AVX1-NEXT:    movl %eax, %edx
+; AVX1-NEXT:    movq %rax, %r10
+; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX1-NEXT:    shrl $16, %eax
+; AVX1-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    movq (%rdi), %rcx
+; AVX1-NEXT:    shrl $24, %edx
+; AVX1-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; AVX1-NEXT:    movq 24(%rdi), %rdx
+; AVX1-NEXT:    shrq $32, %rbx
+; AVX1-NEXT:    vpinsrb $4, %ebx, %xmm0, %xmm0
+; AVX1-NEXT:    movzbl %bh, %eax
+; AVX1-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    movzbl %dh, %eax
+; AVX1-NEXT:    shrq $56, %r10
 ; AVX1-NEXT:    shrq $48, %r8
 ; AVX1-NEXT:    vpinsrb $6, %r8d, %xmm0, %xmm0
-; AVX1-NEXT:    movq 24(%rdi), %rcx
-; AVX1-NEXT:    shrq $56, %rax
-; AVX1-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %eax
-; AVX1-NEXT:    shrl $8, %eax
-; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $7, %r10d, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
 ; AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %eax
+; AVX1-NEXT:    movl %edx, %eax
 ; AVX1-NEXT:    shrl $16, %eax
 ; AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    movl %ecx, %eax
+; AVX1-NEXT:    movl %edx, %eax
 ; AVX1-NEXT:    shrl $24, %eax
 ; AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    movzbl %ch, %eax
+; AVX1-NEXT:    vmovd %ecx, %xmm1
+; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    movl %ecx, %eax
+; AVX1-NEXT:    shrl $16, %eax
+; AVX1-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    movl %ecx, %eax
+; AVX1-NEXT:    shrl $24, %eax
+; AVX1-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    movq %rcx, %r8
+; AVX1-NEXT:    movq %rcx, %r10
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    shrq $32, %rax
+; AVX1-NEXT:    movzbl %ah, %ebp
+; AVX1-NEXT:    movq %rdx, %rcx
+; AVX1-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    movq %rdx, %rax
+; AVX1-NEXT:    movq 8(%rdi), %rbx
+; AVX1-NEXT:    vpinsrb $5, %ebp, %xmm1, %xmm1
+; AVX1-NEXT:    movzbl %bh, %ebp
+; AVX1-NEXT:    shrq $56, %r10
+; AVX1-NEXT:    shrq $48, %r8
+; AVX1-NEXT:    shrq $32, %rax
 ; AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rax
-; AVX1-NEXT:    shrq $40, %rax
-; AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rcx, %rax
-; AVX1-NEXT:    shrq $48, %rax
-; AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    movq (%rdi), %rax
-; AVX1-NEXT:    shrq $56, %rcx
-; AVX1-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $8, %ecx
-; AVX1-NEXT:    vmovd %eax, %xmm1
-; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $24, %ecx
-; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $32, %rcx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $40, %rcx
-; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $48, %rcx
-; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq 8(%rdi), %rcx
-; AVX1-NEXT:    shrq $56, %rax
-; AVX1-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    movl %ecx, %eax
-; AVX1-NEXT:    shrl $8, %eax
-; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    movl %ecx, %eax
+; AVX1-NEXT:    movzbl %ah, %edi
+; AVX1-NEXT:    vpinsrb $6, %r8d, %xmm1, %xmm1
+; AVX1-NEXT:    vpinsrb $7, %r10d, %xmm1, %xmm1
+; AVX1-NEXT:    vpinsrb $8, %ebx, %xmm1, %xmm1
+; AVX1-NEXT:    vpinsrb $9, %ebp, %xmm1, %xmm1
+; AVX1-NEXT:    movl %ebx, %eax
 ; AVX1-NEXT:    shrl $16, %eax
 ; AVX1-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    movl %ecx, %eax
+; AVX1-NEXT:    movl %ebx, %eax
 ; AVX1-NEXT:    shrl $24, %eax
 ; AVX1-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rcx, %rax
-; AVX1-NEXT:    shrq $32, %rax
-; AVX1-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rcx, %rax
-; AVX1-NEXT:    shrq $40, %rax
-; AVX1-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rcx, %rax
+; AVX1-NEXT:    movq %rbx, %rax
+; AVX1-NEXT:    movq %rbx, %r8
+; AVX1-NEXT:    shrq $32, %rbx
+; AVX1-NEXT:    movzbl %bh, %ebp
+; AVX1-NEXT:    vpinsrb $12, %ebx, %xmm1, %xmm1
+; AVX1-NEXT:    vpinsrb $13, %edi, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
+; AVX1-NEXT:    shrq $48, %rcx
+; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    shrq $56, %rdx
+; AVX1-NEXT:    vpinsrb $15, %edx, %xmm0, %xmm0
 ; AVX1-NEXT:    shrq $48, %rax
 ; AVX1-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    shrq $56, %rcx
-; AVX1-NEXT:    vpinsrb $15, %ecx, %xmm1, %xmm1
+; AVX1-NEXT:    shrq $56, %r8
+; AVX1-NEXT:    vpinsrb $15, %r8d, %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb 48(%rsi), %xmm0, %xmm2
 ; AVX1-NEXT:    vpaddb 32(%rsi), %xmm1, %xmm3
 ; AVX1-NEXT:    vpaddb 16(%rsi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb (%rsi), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqa %xmm1, (%rdx)
-; AVX1-NEXT:    vmovdqa %xmm0, 16(%rdx)
-; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdx)
-; AVX1-NEXT:    vmovdqa %xmm2, 48(%rdx)
+; AVX1-NEXT:    vmovdqa %xmm1, (%r9)
+; AVX1-NEXT:    vmovdqa %xmm0, 16(%r9)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%r9)
+; AVX1-NEXT:    vmovdqa %xmm2, 48(%r9)
 ; AVX1-NEXT:    popq %rbx
+; AVX1-NEXT:    popq %rbp
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rbp
 ; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movq 16(%rdi), %rcx
-; AVX2-NEXT:    movq %rcx, %rax
-; AVX2-NEXT:    movq %rcx, %r8
-; AVX2-NEXT:    movq %rcx, %r9
-; AVX2-NEXT:    movq %rcx, %r10
-; AVX2-NEXT:    movl %ecx, %r11d
-; AVX2-NEXT:    movl %ecx, %ebx
-; AVX2-NEXT:    vmovd %ecx, %xmm0
-; AVX2-NEXT:    shrl $8, %ecx
+; AVX2-NEXT:    movq %rdx, %r9
+; AVX2-NEXT:    movq 16(%rdi), %rax
+; AVX2-NEXT:    movq %rax, %r8
+; AVX2-NEXT:    movq %rax, %rbx
+; AVX2-NEXT:    movzbl %ah, %ecx
+; AVX2-NEXT:    vmovd %eax, %xmm0
 ; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $16, %ebx
-; AVX2-NEXT:    vpinsrb $2, %ebx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $24, %r11d
-; AVX2-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $32, %r10
-; AVX2-NEXT:    vpinsrb $4, %r10d, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $40, %r9
-; AVX2-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
+; AVX2-NEXT:    movl %eax, %edx
+; AVX2-NEXT:    movq %rax, %r10
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    shrl $16, %eax
+; AVX2-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    movq (%rdi), %rcx
+; AVX2-NEXT:    shrl $24, %edx
+; AVX2-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; AVX2-NEXT:    movq 24(%rdi), %rdx
+; AVX2-NEXT:    shrq $32, %rbx
+; AVX2-NEXT:    vpinsrb $4, %ebx, %xmm0, %xmm0
+; AVX2-NEXT:    movzbl %bh, %eax
+; AVX2-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    movzbl %dh, %eax
+; AVX2-NEXT:    shrq $56, %r10
 ; AVX2-NEXT:    shrq $48, %r8
 ; AVX2-NEXT:    vpinsrb $6, %r8d, %xmm0, %xmm0
-; AVX2-NEXT:    movq 24(%rdi), %rcx
-; AVX2-NEXT:    shrq $56, %rax
-; AVX2-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %eax
-; AVX2-NEXT:    shrl $8, %eax
-; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $7, %r10d, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
 ; AVX2-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %eax
+; AVX2-NEXT:    movl %edx, %eax
 ; AVX2-NEXT:    shrl $16, %eax
 ; AVX2-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    movl %ecx, %eax
+; AVX2-NEXT:    movl %edx, %eax
 ; AVX2-NEXT:    shrl $24, %eax
 ; AVX2-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    movzbl %ch, %eax
+; AVX2-NEXT:    vmovd %ecx, %xmm1
+; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    movl %ecx, %eax
+; AVX2-NEXT:    shrl $16, %eax
+; AVX2-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    movl %ecx, %eax
+; AVX2-NEXT:    shrl $24, %eax
+; AVX2-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    movq %rcx, %r8
+; AVX2-NEXT:    movq %rcx, %r10
 ; AVX2-NEXT:    movq %rcx, %rax
 ; AVX2-NEXT:    shrq $32, %rax
+; AVX2-NEXT:    movzbl %ah, %ebp
+; AVX2-NEXT:    movq %rdx, %rcx
+; AVX2-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    movq %rdx, %rax
+; AVX2-NEXT:    movq 8(%rdi), %rbx
+; AVX2-NEXT:    vpinsrb $5, %ebp, %xmm1, %xmm1
+; AVX2-NEXT:    movzbl %bh, %ebp
+; AVX2-NEXT:    shrq $56, %r10
+; AVX2-NEXT:    shrq $48, %r8
+; AVX2-NEXT:    shrq $32, %rax
 ; AVX2-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rax
-; AVX2-NEXT:    shrq $40, %rax
-; AVX2-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rcx, %rax
-; AVX2-NEXT:    shrq $48, %rax
-; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    movq (%rdi), %rax
-; AVX2-NEXT:    shrq $56, %rcx
-; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $8, %ecx
-; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $16, %ecx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $24, %ecx
-; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $32, %rcx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $40, %rcx
-; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $48, %rcx
-; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq 8(%rdi), %rcx
-; AVX2-NEXT:    shrq $56, %rax
-; AVX2-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    movl %ecx, %eax
-; AVX2-NEXT:    shrl $8, %eax
-; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    movl %ecx, %eax
+; AVX2-NEXT:    movzbl %ah, %edi
+; AVX2-NEXT:    vpinsrb $6, %r8d, %xmm1, %xmm1
+; AVX2-NEXT:    vpinsrb $7, %r10d, %xmm1, %xmm1
+; AVX2-NEXT:    vpinsrb $8, %ebx, %xmm1, %xmm1
+; AVX2-NEXT:    vpinsrb $9, %ebp, %xmm1, %xmm1
+; AVX2-NEXT:    movl %ebx, %eax
 ; AVX2-NEXT:    shrl $16, %eax
 ; AVX2-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    movl %ecx, %eax
+; AVX2-NEXT:    movl %ebx, %eax
 ; AVX2-NEXT:    shrl $24, %eax
 ; AVX2-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rcx, %rax
-; AVX2-NEXT:    shrq $32, %rax
-; AVX2-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rcx, %rax
-; AVX2-NEXT:    shrq $40, %rax
-; AVX2-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rcx, %rax
+; AVX2-NEXT:    movq %rbx, %rax
+; AVX2-NEXT:    movq %rbx, %r8
+; AVX2-NEXT:    shrq $32, %rbx
+; AVX2-NEXT:    movzbl %bh, %ebp
+; AVX2-NEXT:    vpinsrb $12, %ebx, %xmm1, %xmm1
+; AVX2-NEXT:    vpinsrb $13, %edi, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
+; AVX2-NEXT:    shrq $48, %rcx
+; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    shrq $56, %rdx
+; AVX2-NEXT:    vpinsrb $15, %edx, %xmm0, %xmm0
 ; AVX2-NEXT:    shrq $48, %rax
 ; AVX2-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    shrq $56, %rcx
-; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm1, %xmm1
+; AVX2-NEXT:    shrq $56, %r8
+; AVX2-NEXT:    vpinsrb $15, %r8d, %xmm1, %xmm1
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vpaddb 32(%rsi), %ymm0, %ymm1
 ; AVX2-NEXT:    vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqa %ymm0, (%rdx)
-; AVX2-NEXT:    vmovdqa %ymm1, 32(%rdx)
+; AVX2-NEXT:    vmovdqa %ymm0, (%r9)
+; AVX2-NEXT:    vmovdqa %ymm1, 32(%r9)
 ; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %rbp
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
index 71887e369bd18..ffad77c16aa64 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
@@ -3581,6 +3581,7 @@ define zeroext i16 @atomic_shl1_or_16_const_valnz(ptr %v) nounwind {
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
 ; X86-NEXT:    jne .LBB55_1
 ; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    shrl $4, %eax
 ; X86-NEXT:    andl $1, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -3599,6 +3600,7 @@ define zeroext i16 @atomic_shl1_or_16_const_valnz(ptr %v) nounwind {
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
 ; X64-NEXT:    jne .LBB55_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    shrl $4, %eax
 ; X64-NEXT:    andl $1, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/avgceils-scalar.ll b/llvm/test/CodeGen/X86/avgceils-scalar.ll
index 91121bd4ad935..c6371ed0e5a8b 100644
--- a/llvm/test/CodeGen/X86/avgceils-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceils-scalar.ll
@@ -14,6 +14,7 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
@@ -23,6 +24,7 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 ; X64-NEXT:    movsbl %sil, %eax
 ; X64-NEXT:    movsbl %dil, %ecx
 ; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -39,6 +41,7 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
@@ -48,6 +51,7 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 ; X64-NEXT:    movsbl %sil, %eax
 ; X64-NEXT:    movsbl %dil, %ecx
 ; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
index b575d34a8c2dd..582cd9619eb88 100644
--- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
@@ -13,18 +13,20 @@
 define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_fixed_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    movzwl %cx, %eax
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movsbl %sil, %ecx
-; X64-NEXT:    movsbl %dil, %eax
-; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    movzwl %cx, %eax
 ; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -38,18 +40,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_lsb_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    movzwl %cx, %eax
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_lsb_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movsbl %sil, %ecx
-; X64-NEXT:    movsbl %dil, %eax
-; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    movzwl %cx, %eax
 ; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -65,18 +69,20 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
 define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_ext_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    movzwl %cx, %eax
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movsbl %sil, %ecx
-; X64-NEXT:    movsbl %dil, %eax
-; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    movzwl %cx, %eax
 ; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index d92e1a1e7b9d4..39985a90d5f84 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -14,9 +14,9 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
 ; X86-LABEL: test_bitreverse_v2i16:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    rolw $8, %ax
-; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    rolw $8, %dx
+; X86-NEXT:    movzwl %dx, %eax
 ; X86-NEXT:    andl $3855, %edx # imm = 0xF0F
 ; X86-NEXT:    shll $4, %edx
 ; X86-NEXT:    shrl $4, %eax
@@ -33,17 +33,17 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
 ; X86-NEXT:    andl $21845, %eax # imm = 0x5555
 ; X86-NEXT:    leal (%eax,%edx,2), %eax
 ; X86-NEXT:    rolw $8, %cx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $3855, %edx # imm = 0xF0F
-; X86-NEXT:    shll $4, %edx
-; X86-NEXT:    shrl $4, %ecx
+; X86-NEXT:    movzwl %cx, %edx
 ; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
-; X86-NEXT:    orl %edx, %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $13107, %edx # imm = 0x3333
-; X86-NEXT:    shrl $2, %ecx
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    shrl $4, %edx
+; X86-NEXT:    andl $3855, %edx # imm = 0xF0F
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
-; X86-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-NEXT:    shrl $2, %edx
+; X86-NEXT:    andl $13107, %edx # imm = 0x3333
+; X86-NEXT:    leal (%edx,%ecx,4), %ecx
 ; X86-NEXT:    movl %ecx, %edx
 ; X86-NEXT:    andl $21845, %edx # imm = 0x5555
 ; X86-NEXT:    shrl %ecx
@@ -354,9 +354,9 @@ declare i16 @llvm.bitreverse.i16(i16) readnone
 define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ; X86-LABEL: test_bitreverse_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    rolw $8, %ax
-; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    rolw $8, %cx
+; X86-NEXT:    movzwl %cx, %eax
 ; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
 ; X86-NEXT:    shll $4, %ecx
 ; X86-NEXT:    shrl $4, %eax
@@ -377,19 +377,18 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; X64-LABEL: test_bitreverse_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    rolw $8, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %edi
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    andl $3855, %edi # imm = 0xF0F
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $4, %edi
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-NEXT:    orl %edi, %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl $13107, %ecx # imm = 0x3333
+; X64-NEXT:    shrl $2, %eax
 ; X64-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-NEXT:    shrl $2, %edi
-; X64-NEXT:    andl $13107, %edi # imm = 0x3333
-; X64-NEXT:    leal (%rdi,%rax,4), %eax
+; X64-NEXT:    leal (%rax,%rcx,4), %eax
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; X64-NEXT:    shrl %eax
diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
index 4b0e5441b4abf..ae70b6a5a4665 100644
--- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
@@ -726,9 +726,9 @@ define void @PR46461(i16 %x, ptr %y) {
 ; SSE-LABEL: PR46461:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    shrl %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; SSE-NEXT:    psrld $1, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, 48(%rsi)
 ; SSE-NEXT:    movdqa %xmm0, 32(%rsi)
 ; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
@@ -738,9 +738,9 @@ define void @PR46461(i16 %x, ptr %y) {
 ; AVX1-LABEL: PR46461:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    movzwl %di, %eax
+; AVX1-NEXT:    shrl %eax
 ; AVX1-NEXT:    vmovd %eax, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; AVX1-NEXT:    vmovaps %ymm0, 32(%rsi)
 ; AVX1-NEXT:    vmovaps %ymm0, (%rsi)
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index f7baee9c8e99e..11d6795474c62 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -3134,7 +3134,7 @@ define i16 @combine_i16_sdiv_pow2(i16 %x) {
 ; CHECK-NEXT:    testw %di, %di
 ; CHECK-NEXT:    cmovnsl %edi, %eax
 ; CHECK-NEXT:    cwtl
-; CHECK-NEXT:    shrl $4, %eax
+; CHECK-NEXT:    sarl $4, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %1 = sdiv i16 %x, 16
diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
index 142ac754c3f7e..cb972a14c63b1 100644
--- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
@@ -582,7 +582,7 @@ define i16 @test_i16_2032_mask_lshr_4(i16 %a0) {
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_4:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    shrl $4, %eax
 ; X64-NEXT:    andl $127, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -602,7 +602,7 @@ define i16 @test_i16_2032_mask_lshr_5(i16 %a0) {
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_5:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    shrl $5, %eax
 ; X64-NEXT:    andl $63, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -622,7 +622,7 @@ define i16 @test_i16_2032_mask_lshr_6(i16 %a0) {
 ;
 ; X64-LABEL: test_i16_2032_mask_lshr_6:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    shrl $6, %eax
 ; X64-NEXT:    andl $31, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -763,7 +763,7 @@ define i16 @test_i16_2032_mask_ashr_4(i16 %a0) {
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_4:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    shrl $4, %eax
 ; X64-NEXT:    andl $127, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -783,7 +783,7 @@ define i16 @test_i16_2032_mask_ashr_5(i16 %a0) {
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_5:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    shrl $5, %eax
 ; X64-NEXT:    andl $63, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -803,7 +803,7 @@ define i16 @test_i16_2032_mask_ashr_6(i16 %a0) {
 ;
 ; X64-LABEL: test_i16_2032_mask_ashr_6:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    shrl $6, %eax
 ; X64-NEXT:    andl $31, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -819,7 +819,7 @@ define i16 @test_i16_65024_mask_ashr_1(i16 %a0) {
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $65024, %eax # imm = 0xFE00
 ; X86-NEXT:    cwtl
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
@@ -827,7 +827,7 @@ define i16 @test_i16_65024_mask_ashr_1(i16 %a0) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    andl $65024, %edi # imm = 0xFE00
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %t0 = and i16 %a0, 65024
@@ -840,7 +840,7 @@ define i16 @test_i16_65024_mask_ashr_8(i16 %a0) {
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $65024, %eax # imm = 0xFE00
 ; X86-NEXT:    cwtl
-; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
@@ -848,7 +848,7 @@ define i16 @test_i16_65024_mask_ashr_8(i16 %a0) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    andl $65024, %edi # imm = 0xFE00
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    sarl $8, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %t0 = and i16 %a0, 65024
@@ -859,14 +859,14 @@ define i16 @test_i16_65024_mask_ashr_9(i16 %a0) {
 ; X86-LABEL: test_i16_65024_mask_ashr_9:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $9, %eax
+; X86-NEXT:    sarl $9, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_ashr_9:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    shrl $9, %eax
+; X64-NEXT:    sarl $9, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %t0 = and i16 %a0, 65024
@@ -877,14 +877,14 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) {
 ; X86-LABEL: test_i16_65024_mask_ashr_10:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $10, %eax
+; X86-NEXT:    sarl $10, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i16_65024_mask_ashr_10:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    shrl $10, %eax
+; X64-NEXT:    sarl $10, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %t0 = and i16 %a0, 65024
diff --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll
index a43dba94d30c7..61ecebd0d3aaf 100644
--- a/llvm/test/CodeGen/X86/ctpop-mask.ll
+++ b/llvm/test/CodeGen/X86/ctpop-mask.ll
@@ -150,6 +150,8 @@ define i16 @ctpop_shifted_mask3(i16 %x) nounwind readnone {
 ; X86-NO-POPCOUNT:       # %bb.0:
 ; X86-NO-POPCOUNT-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 ; X86-NO-POPCOUNT-NEXT:    andl $14, %ecx
+; X86-NO-POPCOUNT-NEXT:    shrl %ecx
+; X86-NO-POPCOUNT-NEXT:    addl %ecx, %ecx
 ; X86-NO-POPCOUNT-NEXT:    movl $59796, %eax # imm = 0xE994
 ; X86-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NO-POPCOUNT-NEXT:    shrl %cl, %eax
@@ -159,8 +161,10 @@ define i16 @ctpop_shifted_mask3(i16 %x) nounwind readnone {
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
 ; X64-NO-POPCOUNT:       # %bb.0:
-; X64-NO-POPCOUNT-NEXT:    movl %edi, %ecx
-; X64-NO-POPCOUNT-NEXT:    andl $14, %ecx
+; X64-NO-POPCOUNT-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NO-POPCOUNT-NEXT:    andl $14, %edi
+; X64-NO-POPCOUNT-NEXT:    shrl %edi
+; X64-NO-POPCOUNT-NEXT:    leal (%rdi,%rdi), %ecx
 ; X64-NO-POPCOUNT-NEXT:    movl $59796, %eax # imm = 0xE994
 ; X64-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NO-POPCOUNT-NEXT:    shrl %cl, %eax
diff --git a/llvm/test/CodeGen/X86/dagcombine-shifts.ll b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
index 19b9452e7117e..ef8ec45b841b3 100644
--- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
@@ -159,14 +159,18 @@ define i64 @fun8(i16 zeroext %v) {
 ; X86-LABEL: fun8:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl $1048560, %eax # imm = 0xFFFF0
+; X86-NEXT:    sarl $4, %eax
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    andl $1048560, %eax # imm = 0xFFFF0
+; X64-NEXT:    sarl $4, %eax
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    shll $4, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = ashr i16 %v, 4
@@ -233,10 +237,10 @@ entry:
 define i64 @fun11(i16 zeroext %v) {
 ; X86-LABEL: fun11:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    shrl $4, %ecx
-; X86-NEXT:    andl $-16, %eax
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
@@ -246,6 +250,7 @@ define i64 @fun11(i16 zeroext %v) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    movl %eax, %eax
 ; X64-NEXT:    andl $-16, %edi
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/extract-store.ll b/llvm/test/CodeGen/X86/extract-store.ll
index 255ea44e520c0..47bf7b39829da 100644
--- a/llvm/test/CodeGen/X86/extract-store.ll
+++ b/llvm/test/CodeGen/X86/extract-store.ll
@@ -95,13 +95,15 @@ define void @extract_i8_15(ptr nocapture %dst, <16 x i8> %foo) nounwind {
 ; SSE2-X86:       # %bb.0:
 ; SSE2-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE2-X86-NEXT:    pextrw $7, %xmm0, %ecx
-; SSE2-X86-NEXT:    movb %ch, (%eax)
+; SSE2-X86-NEXT:    shrl $8, %ecx
+; SSE2-X86-NEXT:    movb %cl, (%eax)
 ; SSE2-X86-NEXT:    retl
 ;
 ; SSE2-X64-LABEL: extract_i8_15:
 ; SSE2-X64:       # %bb.0:
 ; SSE2-X64-NEXT:    pextrw $7, %xmm0, %eax
-; SSE2-X64-NEXT:    movb %ah, (%rdi)
+; SSE2-X64-NEXT:    shrl $8, %eax
+; SSE2-X64-NEXT:    movb %al, (%rdi)
 ; SSE2-X64-NEXT:    retq
 ;
 ; SSE41-X86-LABEL: extract_i8_15:
diff --git a/llvm/test/CodeGen/X86/field-extract-use-trunc.ll b/llvm/test/CodeGen/X86/field-extract-use-trunc.ll
index b9721d2491054..1e7bcbce404ac 100644
--- a/llvm/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/llvm/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -81,8 +81,9 @@ define i16 @test5(i16 %f12) nounwind {
 ;
 ; x86_64-LABEL: test5:
 ; x86_64:       # %bb.0:
-; x86_64-NEXT:    shrl $6, %edi
-; x86_64-NEXT:    movsbl %dil, %eax
+; x86_64-NEXT:    movzwl %di, %eax
+; x86_64-NEXT:    shrl $6, %eax
+; x86_64-NEXT:    movsbl %al, %eax
 ; x86_64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; x86_64-NEXT:    retq
 	%f11 = shl i16 %f12, 2
diff --git a/llvm/test/CodeGen/X86/fixup-bw-copy.ll b/llvm/test/CodeGen/X86/fixup-bw-copy.ll
index 2af90469f4cce..4342898895477 100644
--- a/llvm/test/CodeGen/X86/fixup-bw-copy.ll
+++ b/llvm/test/CodeGen/X86/fixup-bw-copy.ll
@@ -76,19 +76,29 @@ define i16 @test_movw(i16 %a0) {
 define i8 @test_movb_hreg(i16 %a0) {
 ; X64-LABEL: test_movb_hreg:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shrl $8, %eax
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    movzbl %ch, %eax
+; X64-NEXT:    addb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
-; X86-LABEL: test_movb_hreg:
-; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addb %al, %ah
-; X86-NEXT:    movb %ah, %al
-; X86-NEXT:    retl
+; X86-BWON-LABEL: test_movb_hreg:
+; X86-BWON:       # %bb.0:
+; X86-BWON-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BWON-NEXT:    movl %eax, %ecx
+; X86-BWON-NEXT:    shrl $8, %eax
+; X86-BWON-NEXT:    addb %cl, %al
+; X86-BWON-NEXT:    # kill: def $al killed $al killed $eax
+; X86-BWON-NEXT:    retl
+;
+; X86-BWOFF-LABEL: test_movb_hreg:
+; X86-BWOFF:       # %bb.0:
+; X86-BWOFF-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BWOFF-NEXT:    movb %al, %cl
+; X86-BWOFF-NEXT:    shrl $8, %eax
+; X86-BWOFF-NEXT:    addb %cl, %al
+; X86-BWOFF-NEXT:    # kill: def $al killed $al killed $eax
+; X86-BWOFF-NEXT:    retl
   %tmp0 = trunc i16 %a0 to i8
   %tmp1 = lshr i16 %a0, 8
   %tmp2 = trunc i16 %tmp1 to i8
diff --git a/llvm/test/CodeGen/X86/h-register-store.ll b/llvm/test/CodeGen/X86/h-register-store.ll
index a13b43918b524..bc80232684f73 100644
--- a/llvm/test/CodeGen/X86/h-register-store.ll
+++ b/llvm/test/CodeGen/X86/h-register-store.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
 ; X64:      mov
 ; X64-NEXT: movb %ah, (%rsi)
@@ -37,18 +38,86 @@
 ; Use h-register extract and store.
 
 define void @foo16(i16 inreg %p, ptr inreg %z) nounwind {
+; X64-LABEL: foo16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzbl %ah, %eax
+; X64-NEXT:    movb %al, (%rsi)
+; X64-NEXT:    retq
+;
+; X32-LABEL: foo16:
+; X32:       # %bb.0:
+; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movzbl %ah, %eax
+; X32-NEXT:    movb %al, (%esi)
+; X32-NEXT:    retq
+;
+; W64-LABEL: foo16:
+; W64:       # %bb.0:
+; W64-NEXT:    movzwl %cx, %eax
+; W64-NEXT:    shrl $8, %eax
+; W64-NEXT:    movb %al, (%rdx)
+; W64-NEXT:    retq
+;
+; X86-LABEL: foo16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl %ah, %eax
+; X86-NEXT:    movb %al, (%edx)
+; X86-NEXT:    retl
   %q = lshr i16 %p, 8
   %t = trunc i16 %q to i8
   store i8 %t, ptr %z
   ret void
 }
 define void @foo32(i32 inreg %p, ptr inreg %z) nounwind {
+; X64-LABEL: foo32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movb %ah, (%rsi)
+; X64-NEXT:    retq
+;
+; X32-LABEL: foo32:
+; X32:       # %bb.0:
+; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movb %ah, (%esi)
+; X32-NEXT:    retq
+;
+; W64-LABEL: foo32:
+; W64:       # %bb.0:
+; W64-NEXT:    movb %ch, (%rdx)
+; W64-NEXT:    retq
+;
+; X86-LABEL: foo32:
+; X86:       # %bb.0:
+; X86-NEXT:    movb %ah, (%edx)
+; X86-NEXT:    retl
   %q = lshr i32 %p, 8
   %t = trunc i32 %q to i8
   store i8 %t, ptr %z
   ret void
 }
 define void @foo64(i64 inreg %p, ptr inreg %z) nounwind {
+; X64-LABEL: foo64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movb %ah, (%rsi)
+; X64-NEXT:    retq
+;
+; X32-LABEL: foo64:
+; X32:       # %bb.0:
+; X32-NEXT:    movq %rdi, %rax
+; X32-NEXT:    movb %ah, (%esi)
+; X32-NEXT:    retq
+;
+; W64-LABEL: foo64:
+; W64:       # %bb.0:
+; W64-NEXT:    movb %ch, (%rdx)
+; W64-NEXT:    retq
+;
+; X86-LABEL: foo64:
+; X86:       # %bb.0:
+; X86-NEXT:    movb %ah, (%ecx)
+; X86-NEXT:    retl
   %q = lshr i64 %p, 8
   %t = trunc i64 %q to i8
   store i8 %t, ptr %z
diff --git a/llvm/test/CodeGen/X86/h-registers-0.ll b/llvm/test/CodeGen/X86/h-registers-0.ll
index 76b0a34643d52..f8e57a2da1372 100644
--- a/llvm/test/CodeGen/X86/h-registers-0.ll
+++ b/llvm/test/CodeGen/X86/h-registers-0.ll
@@ -84,30 +84,33 @@ define void @bar32(i32 inreg %x, ptr inreg %p) nounwind {
 define void @bar16(i16 inreg %x, ptr inreg %p) nounwind {
 ; X64-LABEL: bar16:
 ; X64:       # %bb.0:
-; X64-NEXT:    shrl $8, %edi
-; X64-NEXT:    incb %dil
-; X64-NEXT:    movb %dil, (%rsi)
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movzbl %ah, %eax
+; X64-NEXT:    incb %al
+; X64-NEXT:    movb %al, (%rsi)
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: bar16:
 ; X32:       # %bb.0:
-; X32-NEXT:    shrl $8, %edi
-; X32-NEXT:    incb %dil
-; X32-NEXT:    movb %dil, (%esi)
+; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movzbl %ah, %eax
+; X32-NEXT:    incb %al
+; X32-NEXT:    movb %al, (%esi)
 ; X32-NEXT:    retq
 ;
 ; WIN64-LABEL: bar16:
 ; WIN64:       # %bb.0:
-; WIN64-NEXT:    # kill: def $cx killed $cx def $ecx
-; WIN64-NEXT:    shrl $8, %ecx
-; WIN64-NEXT:    incb %cl
-; WIN64-NEXT:    movb %cl, (%rdx)
+; WIN64-NEXT:    movzwl %cx, %eax
+; WIN64-NEXT:    shrl $8, %eax
+; WIN64-NEXT:    incb %al
+; WIN64-NEXT:    movb %al, (%rdx)
 ; WIN64-NEXT:    retq
 ;
 ; X86-32-LABEL: bar16:
 ; X86-32:       # %bb.0:
-; X86-32-NEXT:    incb %ah
-; X86-32-NEXT:    movb %ah, (%edx)
+; X86-32-NEXT:    movzbl %ah, %eax
+; X86-32-NEXT:    incb %al
+; X86-32-NEXT:    movb %al, (%edx)
 ; X86-32-NEXT:    retl
 
 
diff --git a/llvm/test/CodeGen/X86/h-registers-3.ll b/llvm/test/CodeGen/X86/h-registers-3.ll
index 8e14bf3a6ee18..f041ea887e3aa 100644
--- a/llvm/test/CodeGen/X86/h-registers-3.ll
+++ b/llvm/test/CodeGen/X86/h-registers-3.ll
@@ -8,7 +8,9 @@ define zeroext i8 @foo() nounwind ssp {
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    calll bar at PLT
-; X86-NEXT:    movb %ah, %al
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
 ;
@@ -17,7 +19,7 @@ define zeroext i8 @foo() nounwind ssp {
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    callq bar at PLT
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    shrl $8, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    popq %rcx
@@ -28,7 +30,7 @@ define zeroext i8 @foo() nounwind ssp {
 ; X32-NEXT:    pushq %rax
 ; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    callq bar at PLT
-; X32-NEXT:    # kill: def $ax killed $ax def $eax
+; X32-NEXT:    movzwl %ax, %eax
 ; X32-NEXT:    shrl $8, %eax
 ; X32-NEXT:    # kill: def $al killed $al killed $eax
 ; X32-NEXT:    popq %rcx
diff --git a/llvm/test/CodeGen/X86/i16lshr8pat.ll b/llvm/test/CodeGen/X86/i16lshr8pat.ll
index 5bb0b9f1a73de..d66beed605657 100644
--- a/llvm/test/CodeGen/X86/i16lshr8pat.ll
+++ b/llvm/test/CodeGen/X86/i16lshr8pat.ll
@@ -14,6 +14,15 @@ target triple = "i386-unknown-linux-gnu"
 ; CHECK:       sub_8bit_hi
 ; CHECK-LABEL: bb.2.endif1:
 
+; FAIL
+; %5:gr32 = MOVZX32rr16 %0
+; %6:gr32 = SHR32ri %5, 8, implicit-def dead $eflags
+; %7:gr32_abcd = COPY killed %6
+; %8:gr8 = COPY %7.sub_8bit
+; MOV8mr %2, 1, $noreg, 0, $noreg, killed %8 :: (store (s8) into %ir.dst)
+; %9:gr32 = MOV32r0 implicit-def dead $eflags
+; %4:gr16 = COPY %9.sub_16bit
+
 define i16 @foo4(i32 %prec, ptr%dst, ptr%src) {
 entry:
   %cnd = icmp ne i32 %prec, 0
diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll
index 1866796c7c8c0..d141ad7f48a14 100644
--- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll
+++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll
@@ -4,8 +4,9 @@
 define i16 @test5(i16 %f12) nounwind {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $6, %edi
-; CHECK-NEXT:    movsbl %dil, %eax
+; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    shrl $6, %eax
+; CHECK-NEXT:    movsbl %al, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
 	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/isel-shift.ll b/llvm/test/CodeGen/X86/isel-shift.ll
index 476dcf04dbaa2..8c53494cef825 100644
--- a/llvm/test/CodeGen/X86/isel-shift.ll
+++ b/llvm/test/CodeGen/X86/isel-shift.ll
@@ -843,7 +843,7 @@ define i16 @ashr_imm1_i16(i16 %a) {
 ; SDAG-X86-LABEL: ashr_imm1_i16:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; SDAG-X86-NEXT:    shrl %eax
+; SDAG-X86-NEXT:    sarl %eax
 ; SDAG-X86-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SDAG-X86-NEXT:    retl
 ;
@@ -856,7 +856,7 @@ define i16 @ashr_imm1_i16(i16 %a) {
 ; SDAG-X64-LABEL: ashr_imm1_i16:
 ; SDAG-X64:       ## %bb.0:
 ; SDAG-X64-NEXT:    movswl %di, %eax
-; SDAG-X64-NEXT:    shrl %eax
+; SDAG-X64-NEXT:    sarl %eax
 ; SDAG-X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SDAG-X64-NEXT:    retq
 ;
@@ -1134,7 +1134,7 @@ define i16 @ashr_imm4_i16(i16 %a) {
 ; SDAG-X86-LABEL: ashr_imm4_i16:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; SDAG-X86-NEXT:    shrl $4, %eax
+; SDAG-X86-NEXT:    sarl $4, %eax
 ; SDAG-X86-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SDAG-X86-NEXT:    retl
 ;
@@ -1147,7 +1147,7 @@ define i16 @ashr_imm4_i16(i16 %a) {
 ; SDAG-X64-LABEL: ashr_imm4_i16:
 ; SDAG-X64:       ## %bb.0:
 ; SDAG-X64-NEXT:    movswl %di, %eax
-; SDAG-X64-NEXT:    shrl $4, %eax
+; SDAG-X64-NEXT:    sarl $4, %eax
 ; SDAG-X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; SDAG-X64-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/known-signbits-shl.ll b/llvm/test/CodeGen/X86/known-signbits-shl.ll
index 57d557dec11b9..8a0750fdb3bd2 100644
--- a/llvm/test/CodeGen/X86/known-signbits-shl.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-shl.ll
@@ -48,7 +48,7 @@ define void @computeNumSignBits_shl_zext_2(i8 %x, ptr %p) nounwind {
 ; X64-NEXT:    addl $32767, %edx # imm = 0x7FFF
 ; X64-NEXT:    shll $14, %eax
 ; X64-NEXT:    movswl %ax, %edi
-; X64-NEXT:    shrl $4, %edi
+; X64-NEXT:    sarl $4, %edi
 ; X64-NEXT:    cmpw %di, %cx
 ; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    movw %ax, (%rsi)
diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
index d2359ced3e19d..8efc29bff973e 100644
--- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
+++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
@@ -315,15 +315,15 @@ define <8 x i16> @ashr_op1_constant(ptr %p) nounwind {
 ; SSE-LABEL: ashr_op1_constant:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movswl (%rdi), %eax
+; SSE-NEXT:    sarl $7, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    psrad $7, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ashr_op1_constant:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movswl (%rdi), %eax
+; AVX-NEXT:    sarl $7, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpsrad $7, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %x = load i16, ptr %p
   %b = ashr i16 %x, 7
diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll
index 3187bf6448690..8853bc5f1f6a5 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore.ll
@@ -3621,7 +3621,8 @@ define void @compressstore_v16i8_v16i8(ptr %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je LBB12_12
 ; SSE2-NEXT:  ## %bb.11: ## %cond.store13
-; SSE2-NEXT:    movb %ch, (%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_12: ## %else14
 ; SSE2-NEXT:    testb $64, %al
@@ -3634,7 +3635,8 @@ define void @compressstore_v16i8_v16i8(ptr %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns LBB12_16
 ; SSE2-NEXT:  ## %bb.15: ## %cond.store19
-; SSE2-NEXT:    movb %ch, (%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_16: ## %else20
 ; SSE2-NEXT:    testl $256, %eax ## imm = 0x100
@@ -3647,7 +3649,8 @@ define void @compressstore_v16i8_v16i8(ptr %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2-NEXT:    testl $512, %eax ## imm = 0x200
 ; SSE2-NEXT:    je LBB12_20
 ; SSE2-NEXT:  ## %bb.19: ## %cond.store25
-; SSE2-NEXT:    movb %ch, (%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_20: ## %else26
 ; SSE2-NEXT:    testl $1024, %eax ## imm = 0x400
@@ -3660,7 +3663,8 @@ define void @compressstore_v16i8_v16i8(ptr %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2-NEXT:    testl $2048, %eax ## imm = 0x800
 ; SSE2-NEXT:    je LBB12_24
 ; SSE2-NEXT:  ## %bb.23: ## %cond.store31
-; SSE2-NEXT:    movb %ch, (%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_24: ## %else32
 ; SSE2-NEXT:    testl $4096, %eax ## imm = 0x1000
@@ -3673,7 +3677,8 @@ define void @compressstore_v16i8_v16i8(ptr %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2-NEXT:    testl $8192, %eax ## imm = 0x2000
 ; SSE2-NEXT:    je LBB12_28
 ; SSE2-NEXT:  ## %bb.27: ## %cond.store37
-; SSE2-NEXT:    movb %ch, (%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_28: ## %else38
 ; SSE2-NEXT:    testl $16384, %eax ## imm = 0x4000
@@ -3708,7 +3713,8 @@ define void @compressstore_v16i8_v16i8(ptr %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2-NEXT:    testl $32768, %eax ## imm = 0x8000
 ; SSE2-NEXT:    je LBB12_32
 ; SSE2-NEXT:  LBB12_31: ## %cond.store43
-; SSE2-NEXT:    movb %ch, (%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: compressstore_v16i8_v16i8:
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index c7320275091c6..ed77c41a20b7b 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -2707,7 +2707,8 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) no
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je LBB15_12
 ; SSE2-NEXT:  ## %bb.11: ## %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  LBB15_12: ## %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm1, %ecx
@@ -2718,7 +2719,8 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) no
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns LBB15_16
 ; SSE2-NEXT:  ## %bb.15: ## %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  LBB15_16: ## %else14
 ; SSE2-NEXT:    testl $256, %eax ## imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm1, %ecx
@@ -2729,7 +2731,8 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) no
 ; SSE2-NEXT:    testl $512, %eax ## imm = 0x200
 ; SSE2-NEXT:    je LBB15_20
 ; SSE2-NEXT:  ## %bb.19: ## %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  LBB15_20: ## %else18
 ; SSE2-NEXT:    testl $1024, %eax ## imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm1, %ecx
@@ -2740,7 +2743,8 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) no
 ; SSE2-NEXT:    testl $2048, %eax ## imm = 0x800
 ; SSE2-NEXT:    je LBB15_24
 ; SSE2-NEXT:  ## %bb.23: ## %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  LBB15_24: ## %else22
 ; SSE2-NEXT:    testl $4096, %eax ## imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm1, %ecx
@@ -2751,7 +2755,8 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) no
 ; SSE2-NEXT:    testl $8192, %eax ## imm = 0x2000
 ; SSE2-NEXT:    je LBB15_28
 ; SSE2-NEXT:  ## %bb.27: ## %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  LBB15_28: ## %else26
 ; SSE2-NEXT:    testl $16384, %eax ## imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm1, %ecx
@@ -2781,7 +2786,8 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, ptr %addr, <16 x i8> %val) no
 ; SSE2-NEXT:    testl $32768, %eax ## imm = 0x8000
 ; SSE2-NEXT:    je LBB15_32
 ; SSE2-NEXT:  LBB15_31: ## %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: store_v16i8_v16i8:
@@ -3308,7 +3314,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je LBB16_12
 ; SSE2-NEXT:  ## %bb.11: ## %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  LBB16_12: ## %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
@@ -3319,7 +3326,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns LBB16_16
 ; SSE2-NEXT:  ## %bb.15: ## %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  LBB16_16: ## %else14
 ; SSE2-NEXT:    testl $256, %eax ## imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
@@ -3330,7 +3338,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $512, %eax ## imm = 0x200
 ; SSE2-NEXT:    je LBB16_20
 ; SSE2-NEXT:  ## %bb.19: ## %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  LBB16_20: ## %else18
 ; SSE2-NEXT:    testl $1024, %eax ## imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
@@ -3341,7 +3350,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $2048, %eax ## imm = 0x800
 ; SSE2-NEXT:    je LBB16_24
 ; SSE2-NEXT:  ## %bb.23: ## %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  LBB16_24: ## %else22
 ; SSE2-NEXT:    testl $4096, %eax ## imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
@@ -3352,7 +3362,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $8192, %eax ## imm = 0x2000
 ; SSE2-NEXT:    je LBB16_28
 ; SSE2-NEXT:  ## %bb.27: ## %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  LBB16_28: ## %else26
 ; SSE2-NEXT:    testl $16384, %eax ## imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
@@ -3363,7 +3374,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testw %ax, %ax
 ; SSE2-NEXT:    jns LBB16_32
 ; SSE2-NEXT:  ## %bb.31: ## %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:  LBB16_32: ## %else30
 ; SSE2-NEXT:    testl $65536, %eax ## imm = 0x10000
 ; SSE2-NEXT:    movd %xmm3, %ecx
@@ -3390,7 +3402,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $2097152, %eax ## imm = 0x200000
 ; SSE2-NEXT:    je LBB16_44
 ; SSE2-NEXT:  ## %bb.43: ## %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 21(%rdi)
 ; SSE2-NEXT:  LBB16_44: ## %else42
 ; SSE2-NEXT:    testl $4194304, %eax ## imm = 0x400000
 ; SSE2-NEXT:    pextrw $3, %xmm3, %ecx
@@ -3401,7 +3414,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $8388608, %eax ## imm = 0x800000
 ; SSE2-NEXT:    je LBB16_48
 ; SSE2-NEXT:  ## %bb.47: ## %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 23(%rdi)
 ; SSE2-NEXT:  LBB16_48: ## %else46
 ; SSE2-NEXT:    testl $16777216, %eax ## imm = 0x1000000
 ; SSE2-NEXT:    pextrw $4, %xmm3, %ecx
@@ -3412,7 +3426,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $33554432, %eax ## imm = 0x2000000
 ; SSE2-NEXT:    je LBB16_52
 ; SSE2-NEXT:  ## %bb.51: ## %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 25(%rdi)
 ; SSE2-NEXT:  LBB16_52: ## %else50
 ; SSE2-NEXT:    testl $67108864, %eax ## imm = 0x4000000
 ; SSE2-NEXT:    pextrw $5, %xmm3, %ecx
@@ -3423,7 +3438,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $134217728, %eax ## imm = 0x8000000
 ; SSE2-NEXT:    je LBB16_56
 ; SSE2-NEXT:  ## %bb.55: ## %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 27(%rdi)
 ; SSE2-NEXT:  LBB16_56: ## %else54
 ; SSE2-NEXT:    testl $268435456, %eax ## imm = 0x10000000
 ; SSE2-NEXT:    pextrw $6, %xmm3, %ecx
@@ -3434,7 +3450,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $536870912, %eax ## imm = 0x20000000
 ; SSE2-NEXT:    je LBB16_60
 ; SSE2-NEXT:  ## %bb.59: ## %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 29(%rdi)
 ; SSE2-NEXT:  LBB16_60: ## %else58
 ; SSE2-NEXT:    testl $1073741824, %eax ## imm = 0x40000000
 ; SSE2-NEXT:    pextrw $7, %xmm3, %ecx
@@ -3479,7 +3496,8 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, ptr %addr, <32 x i8> %val) no
 ; SSE2-NEXT:    testl $-2147483648, %eax ## imm = 0x80000000
 ; SSE2-NEXT:    je LBB16_64
 ; SSE2-NEXT:  LBB16_63: ## %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 31(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: store_v32i8_v32i8:
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll
index ecf4fbb603a8f..e8be0ea93c65f 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll
@@ -693,7 +693,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB2_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB2_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -723,7 +724,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB2_16
 ; SSE2-NEXT:  .LBB2_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i64_v8i8:
@@ -2975,7 +2977,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB10_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB10_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -2986,7 +2989,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB10_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB10_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -2997,7 +3001,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB10_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB10_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -3008,7 +3013,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB10_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB10_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -3019,7 +3025,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB10_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB10_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
@@ -3049,7 +3056,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; SSE2-NEXT:    je .LBB10_32
 ; SSE2-NEXT:  .LBB10_31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v16i32_v16i8:
@@ -4196,7 +4204,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB12_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB12_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -4226,7 +4235,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB12_16
 ; SSE2-NEXT:  .LBB12_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i32_v8i8:
@@ -5054,7 +5064,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB15_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB15_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -5065,7 +5076,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB15_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB15_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -5076,7 +5088,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB15_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB15_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -5087,7 +5100,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB15_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB15_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -5098,7 +5112,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB15_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB15_28: # %else26
 ; SSE2-NEXT:    pand %xmm6, %xmm3
 ; SSE2-NEXT:    pand %xmm6, %xmm2
@@ -5112,7 +5127,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testw %ax, %ax
 ; SSE2-NEXT:    jns .LBB15_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:  .LBB15_32: # %else30
 ; SSE2-NEXT:    testl $65536, %eax # imm = 0x10000
 ; SSE2-NEXT:    movd %xmm2, %ecx
@@ -5139,7 +5155,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $2097152, %eax # imm = 0x200000
 ; SSE2-NEXT:    je .LBB15_44
 ; SSE2-NEXT:  # %bb.43: # %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 21(%rdi)
 ; SSE2-NEXT:  .LBB15_44: # %else42
 ; SSE2-NEXT:    testl $4194304, %eax # imm = 0x400000
 ; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
@@ -5150,7 +5167,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $8388608, %eax # imm = 0x800000
 ; SSE2-NEXT:    je .LBB15_48
 ; SSE2-NEXT:  # %bb.47: # %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 23(%rdi)
 ; SSE2-NEXT:  .LBB15_48: # %else46
 ; SSE2-NEXT:    testl $16777216, %eax # imm = 0x1000000
 ; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
@@ -5161,7 +5179,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $33554432, %eax # imm = 0x2000000
 ; SSE2-NEXT:    je .LBB15_52
 ; SSE2-NEXT:  # %bb.51: # %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 25(%rdi)
 ; SSE2-NEXT:  .LBB15_52: # %else50
 ; SSE2-NEXT:    testl $67108864, %eax # imm = 0x4000000
 ; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
@@ -5172,7 +5191,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $134217728, %eax # imm = 0x8000000
 ; SSE2-NEXT:    je .LBB15_56
 ; SSE2-NEXT:  # %bb.55: # %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 27(%rdi)
 ; SSE2-NEXT:  .LBB15_56: # %else54
 ; SSE2-NEXT:    testl $268435456, %eax # imm = 0x10000000
 ; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
@@ -5183,7 +5203,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $536870912, %eax # imm = 0x20000000
 ; SSE2-NEXT:    je .LBB15_60
 ; SSE2-NEXT:  # %bb.59: # %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 29(%rdi)
 ; SSE2-NEXT:  .LBB15_60: # %else58
 ; SSE2-NEXT:    testl $1073741824, %eax # imm = 0x40000000
 ; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
@@ -5228,7 +5249,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
 ; SSE2-NEXT:    je .LBB15_64
 ; SSE2-NEXT:  .LBB15_63: # %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 31(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v32i16_v32i8:
@@ -6490,7 +6512,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB16_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB16_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -6501,7 +6524,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB16_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB16_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -6512,7 +6536,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB16_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB16_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -6523,7 +6548,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB16_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB16_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -6534,7 +6560,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB16_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB16_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
@@ -6564,7 +6591,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; SSE2-NEXT:    je .LBB16_32
 ; SSE2-NEXT:  .LBB16_31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v16i16_v16i8:
@@ -7242,7 +7270,8 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB17_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB17_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -7272,7 +7301,8 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB17_16
 ; SSE2-NEXT:  .LBB17_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i16_v8i8:
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
index 57b0577ac7cc9..15dfd44ad749c 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -1127,7 +1127,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB2_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB2_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -1157,7 +1158,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB2_16
 ; SSE2-NEXT:  .LBB2_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i64_v8i8:
@@ -3878,7 +3880,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB10_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB10_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -3889,7 +3892,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB10_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB10_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -3900,7 +3904,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB10_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB10_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -3911,7 +3916,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB10_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB10_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -3922,7 +3928,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB10_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB10_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
@@ -3952,7 +3959,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; SSE2-NEXT:    je .LBB10_32
 ; SSE2-NEXT:  .LBB10_31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v16i32_v16i8:
@@ -5088,7 +5096,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB12_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB12_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -5118,7 +5127,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB12_16
 ; SSE2-NEXT:  .LBB12_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i32_v8i8:
@@ -5952,7 +5962,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB15_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB15_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -5963,7 +5974,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB15_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB15_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -5974,7 +5986,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB15_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB15_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -5985,7 +5998,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB15_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB15_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -5996,7 +6010,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB15_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB15_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
@@ -6008,7 +6023,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testw %ax, %ax
 ; SSE2-NEXT:    jns .LBB15_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:  .LBB15_32: # %else30
 ; SSE2-NEXT:    testl $65536, %eax # imm = 0x10000
 ; SSE2-NEXT:    movd %xmm2, %ecx
@@ -6035,7 +6051,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $2097152, %eax # imm = 0x200000
 ; SSE2-NEXT:    je .LBB15_44
 ; SSE2-NEXT:  # %bb.43: # %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 21(%rdi)
 ; SSE2-NEXT:  .LBB15_44: # %else42
 ; SSE2-NEXT:    testl $4194304, %eax # imm = 0x400000
 ; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
@@ -6046,7 +6063,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $8388608, %eax # imm = 0x800000
 ; SSE2-NEXT:    je .LBB15_48
 ; SSE2-NEXT:  # %bb.47: # %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 23(%rdi)
 ; SSE2-NEXT:  .LBB15_48: # %else46
 ; SSE2-NEXT:    testl $16777216, %eax # imm = 0x1000000
 ; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
@@ -6057,7 +6075,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $33554432, %eax # imm = 0x2000000
 ; SSE2-NEXT:    je .LBB15_52
 ; SSE2-NEXT:  # %bb.51: # %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 25(%rdi)
 ; SSE2-NEXT:  .LBB15_52: # %else50
 ; SSE2-NEXT:    testl $67108864, %eax # imm = 0x4000000
 ; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
@@ -6068,7 +6087,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $134217728, %eax # imm = 0x8000000
 ; SSE2-NEXT:    je .LBB15_56
 ; SSE2-NEXT:  # %bb.55: # %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 27(%rdi)
 ; SSE2-NEXT:  .LBB15_56: # %else54
 ; SSE2-NEXT:    testl $268435456, %eax # imm = 0x10000000
 ; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
@@ -6079,7 +6099,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $536870912, %eax # imm = 0x20000000
 ; SSE2-NEXT:    je .LBB15_60
 ; SSE2-NEXT:  # %bb.59: # %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 29(%rdi)
 ; SSE2-NEXT:  .LBB15_60: # %else58
 ; SSE2-NEXT:    testl $1073741824, %eax # imm = 0x40000000
 ; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
@@ -6124,7 +6145,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
 ; SSE2-NEXT:    je .LBB15_64
 ; SSE2-NEXT:  .LBB15_63: # %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 31(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v32i16_v32i8:
@@ -7374,7 +7396,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB16_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB16_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -7385,7 +7408,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB16_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB16_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -7396,7 +7420,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB16_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB16_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -7407,7 +7432,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB16_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB16_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -7418,7 +7444,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB16_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB16_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
@@ -7448,7 +7475,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; SSE2-NEXT:    je .LBB16_32
 ; SSE2-NEXT:  .LBB16_31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v16i16_v16i8:
@@ -8124,7 +8152,8 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB17_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB17_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -8154,7 +8183,8 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB17_16
 ; SSE2-NEXT:  .LBB17_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i16_v8i8:
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
index 0386d9531723d..4b0326d4c66e2 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
@@ -932,7 +932,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB2_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB2_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm1, %ecx
@@ -962,7 +963,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB2_16
 ; SSE2-NEXT:  .LBB2_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i64_v8i8:
@@ -3552,7 +3554,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB10_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB10_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm1, %ecx
@@ -3563,7 +3566,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB10_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB10_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm1, %ecx
@@ -3574,7 +3578,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB10_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB10_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm1, %ecx
@@ -3585,7 +3590,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB10_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB10_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm1, %ecx
@@ -3596,7 +3602,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB10_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB10_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm1, %ecx
@@ -3626,7 +3633,8 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
 ; SSE2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; SSE2-NEXT:    je .LBB10_32
 ; SSE2-NEXT:  .LBB10_31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v16i32_v16i8:
@@ -4812,7 +4820,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB12_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB12_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm4, %ecx
@@ -4842,7 +4851,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB12_16
 ; SSE2-NEXT:  .LBB12_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i32_v8i8:
@@ -5781,7 +5791,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB15_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB15_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -5792,7 +5803,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB15_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB15_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -5803,7 +5815,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB15_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB15_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -5814,7 +5827,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB15_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB15_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -5829,7 +5843,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB15_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB15_28: # %else26
 ; SSE2-NEXT:    psubw %xmm1, %xmm3
 ; SSE2-NEXT:    psubw %xmm4, %xmm2
@@ -5843,7 +5858,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testw %ax, %ax
 ; SSE2-NEXT:    jns .LBB15_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:  .LBB15_32: # %else30
 ; SSE2-NEXT:    testl $65536, %eax # imm = 0x10000
 ; SSE2-NEXT:    movd %xmm2, %ecx
@@ -5870,7 +5886,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $2097152, %eax # imm = 0x200000
 ; SSE2-NEXT:    je .LBB15_44
 ; SSE2-NEXT:  # %bb.43: # %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 21(%rdi)
 ; SSE2-NEXT:  .LBB15_44: # %else42
 ; SSE2-NEXT:    testl $4194304, %eax # imm = 0x400000
 ; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
@@ -5881,7 +5898,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $8388608, %eax # imm = 0x800000
 ; SSE2-NEXT:    je .LBB15_48
 ; SSE2-NEXT:  # %bb.47: # %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 23(%rdi)
 ; SSE2-NEXT:  .LBB15_48: # %else46
 ; SSE2-NEXT:    testl $16777216, %eax # imm = 0x1000000
 ; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
@@ -5892,7 +5910,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $33554432, %eax # imm = 0x2000000
 ; SSE2-NEXT:    je .LBB15_52
 ; SSE2-NEXT:  # %bb.51: # %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 25(%rdi)
 ; SSE2-NEXT:  .LBB15_52: # %else50
 ; SSE2-NEXT:    testl $67108864, %eax # imm = 0x4000000
 ; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
@@ -5903,7 +5922,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $134217728, %eax # imm = 0x8000000
 ; SSE2-NEXT:    je .LBB15_56
 ; SSE2-NEXT:  # %bb.55: # %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 27(%rdi)
 ; SSE2-NEXT:  .LBB15_56: # %else54
 ; SSE2-NEXT:    testl $268435456, %eax # imm = 0x10000000
 ; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
@@ -5914,7 +5934,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $536870912, %eax # imm = 0x20000000
 ; SSE2-NEXT:    je .LBB15_60
 ; SSE2-NEXT:  # %bb.59: # %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 29(%rdi)
 ; SSE2-NEXT:  .LBB15_60: # %else58
 ; SSE2-NEXT:    testl $1073741824, %eax # imm = 0x40000000
 ; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
@@ -5959,7 +5980,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
 ; SSE2-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
 ; SSE2-NEXT:    je .LBB15_64
 ; SSE2-NEXT:  .LBB15_63: # %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 31(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v32i16_v32i8:
@@ -7230,7 +7252,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB16_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB16_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -7241,7 +7264,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testb %al, %al
 ; SSE2-NEXT:    jns .LBB16_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:  .LBB16_16: # %else14
 ; SSE2-NEXT:    testl $256, %eax # imm = 0x100
 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
@@ -7252,7 +7276,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $512, %eax # imm = 0x200
 ; SSE2-NEXT:    je .LBB16_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 9(%rdi)
 ; SSE2-NEXT:  .LBB16_20: # %else18
 ; SSE2-NEXT:    testl $1024, %eax # imm = 0x400
 ; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
@@ -7263,7 +7288,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $2048, %eax # imm = 0x800
 ; SSE2-NEXT:    je .LBB16_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 11(%rdi)
 ; SSE2-NEXT:  .LBB16_24: # %else22
 ; SSE2-NEXT:    testl $4096, %eax # imm = 0x1000
 ; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
@@ -7274,7 +7300,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $8192, %eax # imm = 0x2000
 ; SSE2-NEXT:    je .LBB16_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 13(%rdi)
 ; SSE2-NEXT:  .LBB16_28: # %else26
 ; SSE2-NEXT:    testl $16384, %eax # imm = 0x4000
 ; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
@@ -7304,7 +7331,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
 ; SSE2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; SSE2-NEXT:    je .LBB16_32
 ; SSE2-NEXT:  .LBB16_31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 15(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v16i16_v16i8:
@@ -7990,7 +8018,8 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
 ; SSE2-NEXT:    testb $32, %al
 ; SSE2-NEXT:    je .LBB17_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 5(%rdi)
 ; SSE2-NEXT:  .LBB17_12: # %else10
 ; SSE2-NEXT:    testb $64, %al
 ; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
@@ -8020,7 +8049,8 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
 ; SSE2-NEXT:    testb $-128, %al
 ; SSE2-NEXT:    je .LBB17_16
 ; SSE2-NEXT:  .LBB17_15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    movb %cl, 7(%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: truncstore_v8i16_v8i8:
diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll
index f9a2411465141..2893f84e624cc 100644
--- a/llvm/test/CodeGen/X86/parity-vec.ll
+++ b/llvm/test/CodeGen/X86/parity-vec.ll
@@ -54,18 +54,18 @@ define i1 @canonical_parity_noncanonical_pred(<16 x i1> %x) {
 ; NOPOPCNT-NEXT:    shrl %ecx
 ; NOPOPCNT-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; NOPOPCNT-NEXT:    subl %ecx, %eax
-; NOPOPCNT-NEXT:    movl %eax, %ecx
-; NOPOPCNT-NEXT:    andl $13107, %ecx # imm = 0x3333
-; NOPOPCNT-NEXT:    shrl $2, %eax
+; NOPOPCNT-NEXT:    movzwl %ax, %ecx
 ; NOPOPCNT-NEXT:    andl $13107, %eax # imm = 0x3333
-; NOPOPCNT-NEXT:    addl %ecx, %eax
-; NOPOPCNT-NEXT:    movl %eax, %ecx
-; NOPOPCNT-NEXT:    shrl $4, %ecx
+; NOPOPCNT-NEXT:    shrl $2, %ecx
+; NOPOPCNT-NEXT:    andl $13107, %ecx # imm = 0x3333
 ; NOPOPCNT-NEXT:    addl %eax, %ecx
-; NOPOPCNT-NEXT:    andl $3855, %ecx # imm = 0xF0F
-; NOPOPCNT-NEXT:    movl %ecx, %eax
+; NOPOPCNT-NEXT:    movl %ecx, %edx
+; NOPOPCNT-NEXT:    shrl $4, %edx
+; NOPOPCNT-NEXT:    addl %ecx, %edx
+; NOPOPCNT-NEXT:    andl $3855, %edx # imm = 0xF0F
+; NOPOPCNT-NEXT:    movl %edx, %eax
 ; NOPOPCNT-NEXT:    shrl $8, %eax
-; NOPOPCNT-NEXT:    addl %ecx, %eax
+; NOPOPCNT-NEXT:    addl %edx, %eax
 ; NOPOPCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; NOPOPCNT-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 3004b8b72fcc5..e4dfd665799eb 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -55,41 +55,41 @@ define i16 @cnt16(i16 %x) nounwind readnone {
 ; X86-NEXT:    shrl %ecx
 ; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
-; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    movzwl %ax, %ecx
 ; X86-NEXT:    andl $13107, %eax # imm = 0x3333
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    shrl $4, %ecx
+; X86-NEXT:    shrl $2, %ecx
+; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
 ; X86-NEXT:    addl %eax, %ecx
-; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
 ; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    shrl $4, %eax
 ; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl $8, %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-BASE-LABEL: cnt16:
 ; X64-BASE:       # %bb.0:
-; X64-BASE-NEXT:    movl %edi, %eax
+; X64-BASE-NEXT:    movzwl %di, %eax
 ; X64-BASE-NEXT:    shrl %eax
 ; X64-BASE-NEXT:    andl $21845, %eax # imm = 0x5555
 ; X64-BASE-NEXT:    subl %eax, %edi
-; X64-BASE-NEXT:    movl %edi, %eax
-; X64-BASE-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-BASE-NEXT:    shrl $2, %edi
+; X64-BASE-NEXT:    movzwl %di, %eax
 ; X64-BASE-NEXT:    andl $13107, %edi # imm = 0x3333
-; X64-BASE-NEXT:    addl %eax, %edi
-; X64-BASE-NEXT:    movl %edi, %eax
-; X64-BASE-NEXT:    shrl $4, %eax
+; X64-BASE-NEXT:    shrl $2, %eax
+; X64-BASE-NEXT:    andl $13107, %eax # imm = 0x3333
 ; X64-BASE-NEXT:    addl %edi, %eax
-; X64-BASE-NEXT:    andl $3855, %eax # imm = 0xF0F
 ; X64-BASE-NEXT:    movl %eax, %ecx
-; X64-BASE-NEXT:    shrl $8, %ecx
+; X64-BASE-NEXT:    shrl $4, %ecx
 ; X64-BASE-NEXT:    addl %eax, %ecx
-; X64-BASE-NEXT:    movzbl %cl, %eax
+; X64-BASE-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; X64-BASE-NEXT:    movl %ecx, %eax
+; X64-BASE-NEXT:    shrl $8, %eax
+; X64-BASE-NEXT:    addl %ecx, %eax
+; X64-BASE-NEXT:    movzbl %al, %eax
 ; X64-BASE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-BASE-NEXT:    retq
 ;
@@ -1828,19 +1828,19 @@ define i32 @popcount_i16_zext(i16 zeroext %x) {
 ; X86-NEXT:    shrl %ecx
 ; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
-; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    movzwl %ax, %ecx
 ; X86-NEXT:    andl $13107, %eax # imm = 0x3333
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    shrl $4, %ecx
+; X86-NEXT:    shrl $2, %ecx
+; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
 ; X86-NEXT:    addl %eax, %ecx
-; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
 ; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    shrl $4, %eax
 ; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrl $8, %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-BASE-LABEL: popcount_i16_zext:
@@ -1849,19 +1849,19 @@ define i32 @popcount_i16_zext(i16 zeroext %x) {
 ; X64-BASE-NEXT:    shrl %eax
 ; X64-BASE-NEXT:    andl $21845, %eax # imm = 0x5555
 ; X64-BASE-NEXT:    subl %eax, %edi
-; X64-BASE-NEXT:    movl %edi, %eax
-; X64-BASE-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-BASE-NEXT:    shrl $2, %edi
+; X64-BASE-NEXT:    movzwl %di, %eax
 ; X64-BASE-NEXT:    andl $13107, %edi # imm = 0x3333
-; X64-BASE-NEXT:    addl %eax, %edi
-; X64-BASE-NEXT:    movl %edi, %eax
-; X64-BASE-NEXT:    shrl $4, %eax
+; X64-BASE-NEXT:    shrl $2, %eax
+; X64-BASE-NEXT:    andl $13107, %eax # imm = 0x3333
 ; X64-BASE-NEXT:    addl %edi, %eax
-; X64-BASE-NEXT:    andl $3855, %eax # imm = 0xF0F
 ; X64-BASE-NEXT:    movl %eax, %ecx
-; X64-BASE-NEXT:    shrl $8, %ecx
+; X64-BASE-NEXT:    shrl $4, %ecx
 ; X64-BASE-NEXT:    addl %eax, %ecx
-; X64-BASE-NEXT:    movzbl %cl, %eax
+; X64-BASE-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; X64-BASE-NEXT:    movl %ecx, %eax
+; X64-BASE-NEXT:    shrl $8, %eax
+; X64-BASE-NEXT:    addl %ecx, %eax
+; X64-BASE-NEXT:    movzbl %al, %eax
 ; X64-BASE-NEXT:    retq
 ;
 ; X86-POPCNT-LABEL: popcount_i16_zext:
diff --git a/llvm/test/CodeGen/X86/pr172046.ll b/llvm/test/CodeGen/X86/pr172046.ll
index df8c82adb3a59..cbd0cd29baaab 100644
--- a/llvm/test/CodeGen/X86/pr172046.ll
+++ b/llvm/test/CodeGen/X86/pr172046.ll
@@ -5,9 +5,8 @@
 define i32 @_Z1ft(i16 zeroext %0) {
 ; X86-LABEL: _Z1ft:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $3, %eax
-; X86-NEXT:    movzwl %ax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: _Z1ft:
@@ -24,19 +23,18 @@ entry:
 define i32 @_Z1gt(i16 zeroext %x) {
 ; X86-LABEL: _Z1gt:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $8191, %ecx # imm = 0x1FFF
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (,%eax,8), %ecx
 ; X86-NEXT:    shll $16, %eax
-; X86-NEXT:    leal (%eax,%ecx,8), %eax
+; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: _Z1gt:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    leal (,%rdi,8), %eax
 ; X64-NEXT:    shll $16, %edi
-; X64-NEXT:    leal (%rdi,%rax,8), %eax
+; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
 entry:
   %conv = zext nneg i16 %x to i32
diff --git a/llvm/test/CodeGen/X86/pr32420.ll b/llvm/test/CodeGen/X86/pr32420.ll
index 52d42520900d8..dea3109d0a2a1 100644
--- a/llvm/test/CodeGen/X86/pr32420.ll
+++ b/llvm/test/CodeGen/X86/pr32420.ll
@@ -15,13 +15,13 @@ define i32 @PR32420() {
 ; CHECK-NEXT:    movl %eax, %edx
 ; CHECK-NEXT:    shll $12, %edx
 ; CHECK-NEXT:    movswl %dx, %edx
-; CHECK-NEXT:    shrl $12, %edx
+; CHECK-NEXT:    sarl $12, %edx
 ; CHECK-NEXT:    movq _b at GOTPCREL(%rip), %rsi
 ; CHECK-NEXT:    orw (%rsi), %dx
 ; CHECK-NEXT:    movl (%rcx), %ecx
 ; CHECK-NEXT:    shll $12, %ecx
 ; CHECK-NEXT:    movswl %cx, %ecx
-; CHECK-NEXT:    shrl $12, %ecx
+; CHECK-NEXT:    sarl $12, %ecx
 ; CHECK-NEXT:    andl %edx, %ecx
 ; CHECK-NEXT:    movw %cx, (%rsi)
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/pr44915.ll b/llvm/test/CodeGen/X86/pr44915.ll
index 99205ab60ae11..1ebdd9ccb3190 100644
--- a/llvm/test/CodeGen/X86/pr44915.ll
+++ b/llvm/test/CodeGen/X86/pr44915.ll
@@ -52,14 +52,15 @@ define i32 @extract3(ptr, i32) nounwind {
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    andl $7, %ecx
 ; X64-NEXT:    movd %ecx, %xmm0
-; X64-NEXT:    movd %eax, %xmm2
-; X64-NEXT:    shrl $3, %eax
-; X64-NEXT:    andl $7, %eax
-; X64-NEXT:    movd %eax, %xmm3
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shrl $3, %ecx
+; X64-NEXT:    andl $7, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    psrld $12, %xmm2
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    shrl $12, %eax
+; X64-NEXT:    movd %eax, %xmm1
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    movdqa %xmm0, -24(%rsp)
 ; X64-NEXT:    andl $7, %esi
 ; X64-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
diff --git a/llvm/test/CodeGen/X86/pr77459.ll b/llvm/test/CodeGen/X86/pr77459.ll
index 9c072e6f5e3fc..20fefdc4aa4be 100644
--- a/llvm/test/CodeGen/X86/pr77459.ll
+++ b/llvm/test/CodeGen/X86/pr77459.ll
@@ -118,17 +118,17 @@ define i16 @reverse_cmp_v16i1(<16 x i8> %a0, <16 x i8> %a1) {
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSE2-NEXT:    pmovmskb %xmm0, %eax
 ; SSE2-NEXT:    rolw $8, %ax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $3855, %ecx # imm = 0xF0F
-; SSE2-NEXT:    shll $4, %ecx
-; SSE2-NEXT:    shrl $4, %eax
+; SSE2-NEXT:    movzwl %ax, %ecx
 ; SSE2-NEXT:    andl $3855, %eax # imm = 0xF0F
-; SSE2-NEXT:    orl %ecx, %eax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $13107, %ecx # imm = 0x3333
-; SSE2-NEXT:    shrl $2, %eax
+; SSE2-NEXT:    shll $4, %eax
+; SSE2-NEXT:    shrl $4, %ecx
+; SSE2-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; SSE2-NEXT:    orl %eax, %ecx
+; SSE2-NEXT:    movl %ecx, %eax
 ; SSE2-NEXT:    andl $13107, %eax # imm = 0x3333
-; SSE2-NEXT:    leal (%rax,%rcx,4), %eax
+; SSE2-NEXT:    shrl $2, %ecx
+; SSE2-NEXT:    andl $13107, %ecx # imm = 0x3333
+; SSE2-NEXT:    leal (%rcx,%rax,4), %eax
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; SSE2-NEXT:    shrl %eax
diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll
index 26e68861cf45c..00b11e3ffa36a 100644
--- a/llvm/test/CodeGen/X86/rotate-extract.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract.ll
@@ -223,26 +223,28 @@ define i16 @no_extract_mul(i16 %i) nounwind {
 define i8 @no_extract_udiv(i8 %i) nounwind {
 ; X86-LABEL: no_extract_udiv:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $171, %eax, %ecx
-; X86-NEXT:    imull $79, %eax, %edx
-; X86-NEXT:    subb %dh, %al
-; X86-NEXT:    shrb %al
-; X86-NEXT:    addb %dh, %al
-; X86-NEXT:    shrb $5, %al
-; X86-NEXT:    shlb $3, %ch
-; X86-NEXT:    orb %al, %ch
-; X86-NEXT:    andb $-9, %ch
-; X86-NEXT:    movb %ch, %al
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    imull $171, %ecx, %eax
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    imull $79, %ecx, %edx
+; X86-NEXT:    shrl $8, %edx
+; X86-NEXT:    subb %dl, %cl
+; X86-NEXT:    shrb %cl
+; X86-NEXT:    addb %dl, %cl
+; X86-NEXT:    shrb $5, %cl
+; X86-NEXT:    shlb $3, %al
+; X86-NEXT:    orb %cl, %al
+; X86-NEXT:    andb $-9, %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: no_extract_udiv:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movzbl %dil, %ecx
 ; X64-NEXT:    imull $171, %ecx, %eax
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    movzbl %ah, %eax
 ; X64-NEXT:    imull $79, %ecx, %edx
-; X64-NEXT:    shrl $8, %edx
+; X64-NEXT:    movzbl %dh, %edx
 ; X64-NEXT:    subb %dl, %cl
 ; X64-NEXT:    shrb %cl
 ; X64-NEXT:    addb %dl, %cl
diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll
index 392bc83d9d5d8..5a7b6af321472 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix.ll
@@ -84,7 +84,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X64-NEXT:    cmovel %eax, %edi
 ; X64-NEXT:    addl %edi, %edi
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -111,7 +111,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-NEXT:    cmovel %eax, %esi
 ; X86-NEXT:    addl %esi, %esi
 ; X86-NEXT:    movswl %si, %eax
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
@@ -129,8 +129,9 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    shll $8, %esi
 ; X64-NEXT:    movswl %si, %ecx
+; X64-NEXT:    sarl %ecx
 ; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    shrl $4, %ecx
+; X64-NEXT:    sarl $3, %ecx
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    cwtd
 ; X64-NEXT:    idivw %cx
@@ -147,7 +148,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64-NEXT:    cmovel %eax, %esi
 ; X64-NEXT:    addl %esi, %esi
 ; X64-NEXT:    movswl %si, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -159,8 +160,9 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movswl %ax, %esi
+; X86-NEXT:    sarl %esi
 ; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    shrl $4, %esi
+; X86-NEXT:    sarl $3, %esi
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    cwtd
 ; X86-NEXT:    idivw %si
@@ -177,7 +179,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    cmovel %eax, %edi
 ; X86-NEXT:    addl %edi, %edi
 ; X86-NEXT:    movswl %di, %eax
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 7df490f984928..7c50d8fbe1049 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -152,8 +152,9 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    shll $8, %esi
 ; X64-NEXT:    movswl %si, %ecx
+; X64-NEXT:    sarl %ecx
 ; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    shrl $4, %ecx
+; X64-NEXT:    sarl $3, %ecx
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    cwtd
 ; X64-NEXT:    idivw %cx
@@ -187,8 +188,9 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movswl %ax, %esi
+; X86-NEXT:    sarl %esi
 ; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    shrl $4, %esi
+; X86-NEXT:    sarl $3, %esi
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    cwtd
 ; X86-NEXT:    idivw %si
diff --git a/llvm/test/CodeGen/X86/select-smin-smax.ll b/llvm/test/CodeGen/X86/select-smin-smax.ll
index 3c2ec52f2c261..a1b8a98ce9807 100644
--- a/llvm/test/CodeGen/X86/select-smin-smax.ll
+++ b/llvm/test/CodeGen/X86/select-smin-smax.ll
@@ -111,7 +111,7 @@ define i16 @test_i16_smin(i16 %a) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    shrl $15, %eax
+; X86-NEXT:    sarl $15, %eax
 ; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/select-sra.ll b/llvm/test/CodeGen/X86/select-sra.ll
index d01d69cd64998..92fde99d3d920 100644
--- a/llvm/test/CodeGen/X86/select-sra.ll
+++ b/llvm/test/CodeGen/X86/select-sra.ll
@@ -115,7 +115,7 @@ define i16 @isneg_i16(i16 %x) {
 ; CHECK-LABEL: isneg_i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movswl %di, %eax
-; CHECK-NEXT:    shrl $15, %eax
+; CHECK-NEXT:    sarl $15, %eax
 ; CHECK-NEXT:    andl $542, %eax # imm = 0x21E
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll
index 60ac6df3f77af..1dda82db36f79 100644
--- a/llvm/test/CodeGen/X86/setcc.ll
+++ b/llvm/test/CodeGen/X86/setcc.ll
@@ -287,19 +287,13 @@ define i16 @shift_and(i16 %a) {
 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-NOTBM-LABEL: shift_and:
-; X64-NOTBM:       ## %bb.0:
-; X64-NOTBM-NEXT:    movl %edi, %eax
-; X64-NOTBM-NEXT:    shrl $10, %eax
-; X64-NOTBM-NEXT:    andl $1, %eax
-; X64-NOTBM-NEXT:    ## kill: def $ax killed $ax killed $eax
-; X64-NOTBM-NEXT:    retq
-;
-; X64-TBM-LABEL: shift_and:
-; X64-TBM:       ## %bb.0:
-; X64-TBM-NEXT:    bextrl $266, %edi, %eax ## imm = 0x10A
-; X64-TBM-NEXT:    ## kill: def $ax killed $ax killed $eax
-; X64-TBM-NEXT:    retq
+; X64-LABEL: shift_and:
+; X64:       ## %bb.0:
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    shrl $10, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
   %and = and i16 %a, 1024
   %cmp = icmp ne i16 %and, 0
   %conv = zext i1 %cmp to i16
@@ -354,3 +348,6 @@ define i64 @pr63055(double %arg) {
   %ext = zext i1 %fcmp to i64
   ret i64 %ext
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64-NOTBM: {{.*}}
+; X64-TBM: {{.*}}
diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll
index 604f5c19b92e5..af8625078c501 100644
--- a/llvm/test/CodeGen/X86/shift-mask.ll
+++ b/llvm/test/CodeGen/X86/shift-mask.ll
@@ -150,9 +150,9 @@ define i16 @test_i16_shl_lshr_2(i16 %a0) {
 ;
 ; X64-MASK-LABEL: test_i16_shl_lshr_2:
 ; X64-MASK:       # %bb.0:
-; X64-MASK-NEXT:    movl %edi, %eax
+; X64-MASK-NEXT:    movzwl %di, %eax
 ; X64-MASK-NEXT:    shrl $2, %eax
-; X64-MASK-NEXT:    andl $16376, %eax # imm = 0x3FF8
+; X64-MASK-NEXT:    andl $-8, %eax
 ; X64-MASK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-MASK-NEXT:    retq
 ;
@@ -419,7 +419,7 @@ define i16 @test_i16_lshr_lshr_1(i16 %a0) {
 ;
 ; X64-MASK-LABEL: test_i16_lshr_lshr_1:
 ; X64-MASK:       # %bb.0:
-; X64-MASK-NEXT:    movl %edi, %eax
+; X64-MASK-NEXT:    movzwl %di, %eax
 ; X64-MASK-NEXT:    shrl $2, %eax
 ; X64-MASK-NEXT:    andl $2047, %eax # imm = 0x7FF
 ; X64-MASK-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll
index 509d4443e930a..3dfedc65e34d7 100644
--- a/llvm/test/CodeGen/X86/smax.ll
+++ b/llvm/test/CodeGen/X86/smax.ll
@@ -636,8 +636,8 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movswl %si, %eax
 ; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    shrl $15, %ecx
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    sarl $15, %ecx
+; X64-NEXT:    sarl $8, %eax
 ; X64-NEXT:    cmpw %ax, %cx
 ; X64-NEXT:    cmovgl %ecx, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -647,7 +647,7 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    sarl $15, %ecx
 ; X86-NEXT:    cmpw %ax, %cx
 ; X86-NEXT:    cmovgl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll
index 5e9fe27b41d2c..9cdc634a95ce4 100644
--- a/llvm/test/CodeGen/X86/smin.ll
+++ b/llvm/test/CodeGen/X86/smin.ll
@@ -636,8 +636,8 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movswl %si, %eax
 ; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    shrl $15, %ecx
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    sarl $15, %ecx
+; X64-NEXT:    sarl $8, %eax
 ; X64-NEXT:    cmpw %ax, %cx
 ; X64-NEXT:    cmovll %ecx, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -647,7 +647,7 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    sarl $15, %ecx
 ; X86-NEXT:    cmpw %ax, %cx
 ; X86-NEXT:    cmovll %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll
index 8cb032776114b..c380c904acf50 100644
--- a/llvm/test/CodeGen/X86/smul_fix.ll
+++ b/llvm/test/CodeGen/X86/smul_fix.ll
@@ -101,12 +101,11 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ; X64-NEXT:    sarb $4, %dil
 ; X64-NEXT:    shlb $4, %sil
 ; X64-NEXT:    sarb $4, %sil
-; X64-NEXT:    movsbl %sil, %ecx
-; X64-NEXT:    movsbl %dil, %eax
-; X64-NEXT:    imull %ecx, %eax
-; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    imull %eax, %ecx
 ; X64-NEXT:    shrb $2, %cl
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    movzbl %ch, %eax
 ; X64-NEXT:    shlb $6, %al
 ; X64-NEXT:    orb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
@@ -120,12 +119,13 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
-; X86-NEXT:    movsbl %cl, %ecx
-; X86-NEXT:    movsbl %al, %eax
-; X86-NEXT:    imull %ecx, %eax
-; X86-NEXT:    shlb $6, %ah
-; X86-NEXT:    shrb $2, %al
-; X86-NEXT:    orb %ah, %al
+; X86-NEXT:    movsbl %cl, %edx
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    imull %edx, %ecx
+; X86-NEXT:    shrb $2, %cl
+; X86-NEXT:    movzbl %ch, %eax
+; X86-NEXT:    shlb $6, %al
+; X86-NEXT:    orb %cl, %al
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
   %tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 2)
diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll
index e68b6e328b723..3e81908d96f79 100644
--- a/llvm/test/CodeGen/X86/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll
@@ -142,12 +142,11 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ; X64-NEXT:    shlb $4, %sil
 ; X64-NEXT:    sarb $4, %sil
 ; X64-NEXT:    shlb $4, %dil
-; X64-NEXT:    movsbl %dil, %eax
-; X64-NEXT:    movsbl %sil, %ecx
-; X64-NEXT:    imull %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    imull %ecx, %eax
 ; X64-NEXT:    shrb $2, %al
-; X64-NEXT:    shrl $8, %ecx
+; X64-NEXT:    movzbl %ah, %ecx
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    shlb $6, %dl
 ; X64-NEXT:    orb %al, %dl
@@ -172,15 +171,16 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ; X86-NEXT:    movsbl %cl, %ecx
 ; X86-NEXT:    movsbl %al, %eax
 ; X86-NEXT:    imull %ecx, %eax
-; X86-NEXT:    movb %ah, %cl
-; X86-NEXT:    shlb $6, %cl
 ; X86-NEXT:    shrb $2, %al
-; X86-NEXT:    orb %cl, %al
-; X86-NEXT:    movzbl %al, %ecx
-; X86-NEXT:    cmpb $2, %ah
+; X86-NEXT:    movzbl %ah, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    shlb $6, %dl
+; X86-NEXT:    orb %al, %dl
+; X86-NEXT:    movzbl %dl, %eax
+; X86-NEXT:    cmpb $2, %cl
 ; X86-NEXT:    movl $127, %edx
-; X86-NEXT:    cmovll %ecx, %edx
-; X86-NEXT:    cmpb $-2, %ah
+; X86-NEXT:    cmovll %eax, %edx
+; X86-NEXT:    cmpb $-2, %cl
 ; X86-NEXT:    movl $128, %eax
 ; X86-NEXT:    cmovgel %edx, %eax
 ; X86-NEXT:    sarb $4, %al
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
index 5fd1f77e166d4..57dd4b6bf1793 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@@ -542,7 +542,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-NEXT:    cmpq $.Lslh_ret_addr4, %rdx
 ; X64-NEXT:    cmovneq %rbx, %rcx
 ; X64-NEXT:    movl %ebp, (%rax)
-; X64-NEXT:  .Ltmp0:
+; X64-NEXT:  .Ltmp0: # EH_LABEL
 ; X64-NEXT:    shlq $47, %rcx
 ; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    xorl %esi, %esi
@@ -555,7 +555,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    cmpq $.Lslh_ret_addr5, %rcx
 ; X64-NEXT:    cmovneq %rbx, %rax
-; X64-NEXT:  .Ltmp1:
+; X64-NEXT:  .Ltmp1: # EH_LABEL
 ; X64-NEXT:    jmp .LBB4_3
 ; X64-NEXT:  .LBB4_1:
 ; X64-NEXT:    cmovleq %rbx, %rax
@@ -575,7 +575,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB4_4: # %lpad
 ; X64-NEXT:    .cfi_def_cfa_offset 48
-; X64-NEXT:  .Ltmp2:
+; X64-NEXT:  .Ltmp2: # EH_LABEL
 ; X64-NEXT:    movq %rsp, %rcx
 ; X64-NEXT:    sarq $63, %rcx
 ; X64-NEXT:    movl (%rax), %eax
@@ -616,12 +616,12 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-LFENCE-NEXT:    movl $4, %edi
 ; X64-LFENCE-NEXT:    callq __cxa_allocate_exception at PLT
 ; X64-LFENCE-NEXT:    movl %ebp, (%rax)
-; X64-LFENCE-NEXT:  .Ltmp0:
+; X64-LFENCE-NEXT:  .Ltmp0: # EH_LABEL
 ; X64-LFENCE-NEXT:    movq %rax, %rdi
 ; X64-LFENCE-NEXT:    xorl %esi, %esi
 ; X64-LFENCE-NEXT:    xorl %edx, %edx
 ; X64-LFENCE-NEXT:    callq __cxa_throw at PLT
-; X64-LFENCE-NEXT:  .Ltmp1:
+; X64-LFENCE-NEXT:  .Ltmp1: # EH_LABEL
 ; X64-LFENCE-NEXT:  .LBB4_2: # %exit
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    popq %rbx
@@ -633,7 +633,7 @@ define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_harden
 ; X64-LFENCE-NEXT:    retq
 ; X64-LFENCE-NEXT:  .LBB4_3: # %lpad
 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
-; X64-LFENCE-NEXT:  .Ltmp2:
+; X64-LFENCE-NEXT:  .Ltmp2: # EH_LABEL
 ; X64-LFENCE-NEXT:    movl (%rax), %eax
 ; X64-LFENCE-NEXT:    addl (%r14), %eax
 ; X64-LFENCE-NEXT:    cltq
@@ -1049,8 +1049,8 @@ define void @test_deferred_hardening(ptr %ptr1, ptr %ptr2, i32 %x) nounwind spec
 ; X64-NEXT:    cmpq $.Lslh_ret_addr23, %rcx
 ; X64-NEXT:    cmovneq %r15, %rax
 ; X64-NEXT:    movswl (%rbx), %edi
+; X64-NEXT:    sarl $7, %edi
 ; X64-NEXT:    notl %edi
-; X64-NEXT:    shrl $7, %edi
 ; X64-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
 ; X64-NEXT:    orl %eax, %edi
 ; X64-NEXT:    shlq $47, %rax
@@ -1102,8 +1102,8 @@ define void @test_deferred_hardening(ptr %ptr1, ptr %ptr2, i32 %x) nounwind spec
 ; X64-LFENCE-NEXT:    shll $7, %edi
 ; X64-LFENCE-NEXT:    callq sink at PLT
 ; X64-LFENCE-NEXT:    movswl (%rbx), %edi
+; X64-LFENCE-NEXT:    sarl $7, %edi
 ; X64-LFENCE-NEXT:    notl %edi
-; X64-LFENCE-NEXT:    shrl $7, %edi
 ; X64-LFENCE-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
 ; X64-LFENCE-NEXT:    callq sink at PLT
 ; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
diff --git a/llvm/test/CodeGen/X86/sshl_sat.ll b/llvm/test/CodeGen/X86/sshl_sat.ll
index a93be22bf5861..cfdcd4a350a34 100644
--- a/llvm/test/CodeGen/X86/sshl_sat.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat.ll
@@ -70,7 +70,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X64-NEXT:    cmpw %di, %dx
 ; X64-NEXT:    cmovnel %esi, %eax
 ; X64-NEXT:    cwtl
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -91,7 +91,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-NEXT:    cmpw %si, %ax
 ; X86-NEXT:    cmovel %edx, %ecx
 ; X86-NEXT:    movswl %cx, %eax
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -120,7 +120,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64-NEXT:    cmpw %dx, %di
 ; X64-NEXT:    cmovel %eax, %ecx
 ; X64-NEXT:    movswl %cx, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -143,7 +143,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    cmpw %si, %ax
 ; X86-NEXT:    cmovel %edx, %ecx
 ; X86-NEXT:    movswl %cx, %eax
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 82dfeeee13293..fe992f42e4d2b 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -45,7 +45,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X64-NEXT:    divl %ecx
 ; X64-NEXT:    addl %eax, %eax
 ; X64-NEXT:    cwtl
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -60,7 +60,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-NEXT:    divl %ecx
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    cwtl
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %x2 = sext i8 %x to i15
@@ -83,7 +83,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
 ; X64-NEXT:    addl %eax, %eax
 ; X64-NEXT:    cwtl
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -100,7 +100,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    cwtl
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %y2 = sext i8 %y to i15
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 3da5973f9f903..60f7da44967b2 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -57,7 +57,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X64-NEXT:    cmovbl %eax, %ecx
 ; X64-NEXT:    addl %ecx, %ecx
 ; X64-NEXT:    movswl %cx, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -75,7 +75,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-NEXT:    cmovbl %eax, %ecx
 ; X86-NEXT:    addl %ecx, %ecx
 ; X86-NEXT:    movswl %cx, %eax
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %x2 = sext i8 %x to i15
@@ -102,7 +102,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64-NEXT:    cmovbl %eax, %ecx
 ; X64-NEXT:    addl %ecx, %ecx
 ; X64-NEXT:    movswl %cx, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -123,7 +123,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    cmovbl %eax, %ecx
 ; X86-NEXT:    addl %ecx, %ecx
 ; X86-NEXT:    movswl %cx, %eax
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %y2 = sext i8 %y to i15
diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index 7ef859978cdbf..7444fcca4c7d2 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -1234,8 +1234,8 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movswl %si, %eax
 ; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    shrl $15, %ecx
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    sarl $15, %ecx
+; X64-NEXT:    sarl $8, %eax
 ; X64-NEXT:    cmpw %ax, %cx
 ; X64-NEXT:    cmoval %ecx, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1245,7 +1245,7 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    sarl $15, %ecx
 ; X86-NEXT:    cmpw %ax, %cx
 ; X86-NEXT:    cmoval %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll
index c927abf3a4263..dedbebca52a13 100644
--- a/llvm/test/CodeGen/X86/umin.ll
+++ b/llvm/test/CodeGen/X86/umin.ll
@@ -645,8 +645,8 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movswl %si, %eax
 ; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    shrl $15, %ecx
-; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    sarl $15, %ecx
+; X64-NEXT:    sarl $8, %eax
 ; X64-NEXT:    cmpw %ax, %cx
 ; X64-NEXT:    cmovbl %ecx, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -656,7 +656,7 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    sarl $15, %ecx
 ; X86-NEXT:    cmpw %ax, %cx
 ; X86-NEXT:    cmovbl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 8c7078c726328..a9fc7e74744c2 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -120,12 +120,14 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    imull %edx, %eax
-; X86-NEXT:    movb %ah, %cl
-; X86-NEXT:    shlb $6, %cl
-; X86-NEXT:    shrb $2, %al
-; X86-NEXT:    orb %cl, %al
-; X86-NEXT:    movzbl %al, %ecx
-; X86-NEXT:    cmpb $4, %ah
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrb $2, %cl
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb $6, %dl
+; X86-NEXT:    orb %cl, %dl
+; X86-NEXT:    movzbl %dl, %ecx
+; X86-NEXT:    cmpb $4, %al
 ; X86-NEXT:    movl $255, %eax
 ; X86-NEXT:    cmovbl %ecx, %eax
 ; X86-NEXT:    shrb $4, %al
diff --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll
index 9768e4761f47a..59eace48a7300 100644
--- a/llvm/test/CodeGen/X86/ushl_sat.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat.ll
@@ -59,7 +59,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X64-NEXT:    movl $65535, %ecx # imm = 0xFFFF
 ; X64-NEXT:    cmovel %eax, %ecx
 ; X64-NEXT:    movswl %cx, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -77,7 +77,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
 ; X86-NEXT:    cmovel %edx, %eax
 ; X86-NEXT:    cwtl
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -103,7 +103,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X64-NEXT:    movl $65535, %ecx # imm = 0xFFFF
 ; X64-NEXT:    cmovel %eax, %ecx
 ; X64-NEXT:    movswl %cx, %eax
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    sarl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
@@ -123,7 +123,7 @@ define i16 @func3(i15 %x, i8 %y) nounwind {
 ; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
 ; X86-NEXT:    cmovel %edx, %eax
 ; X86-NEXT:    cwtl
-; X86-NEXT:    shrl %eax
+; X86-NEXT:    sarl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index 834dfd63432b0..047d3b348810a 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -83,19 +83,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ; SSE-LABEL: test_bitreverse_i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    # kill: def $edi killed $edi def $rdi
 ; SSE-NEXT:    rolw $8, %di
-; SSE-NEXT:    movl %edi, %eax
-; SSE-NEXT:    andl $3855, %eax # imm = 0xF0F
-; SSE-NEXT:    shll $4, %eax
-; SSE-NEXT:    shrl $4, %edi
+; SSE-NEXT:    movzwl %di, %eax
 ; SSE-NEXT:    andl $3855, %edi # imm = 0xF0F
-; SSE-NEXT:    orl %eax, %edi
-; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    shll $4, %edi
+; SSE-NEXT:    shrl $4, %eax
+; SSE-NEXT:    andl $3855, %eax # imm = 0xF0F
+; SSE-NEXT:    orl %edi, %eax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $13107, %ecx # imm = 0x3333
+; SSE-NEXT:    shrl $2, %eax
 ; SSE-NEXT:    andl $13107, %eax # imm = 0x3333
-; SSE-NEXT:    shrl $2, %edi
-; SSE-NEXT:    andl $13107, %edi # imm = 0x3333
-; SSE-NEXT:    leal (%rdi,%rax,4), %eax
+; SSE-NEXT:    leal (%rax,%rcx,4), %eax
 ; SSE-NEXT:    movl %eax, %ecx
 ; SSE-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; SSE-NEXT:    shrl %eax
@@ -106,19 +105,18 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; AVX-LABEL: test_bitreverse_i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $edi killed $edi def $rdi
 ; AVX-NEXT:    rolw $8, %di
-; AVX-NEXT:    movl %edi, %eax
-; AVX-NEXT:    andl $3855, %eax # imm = 0xF0F
-; AVX-NEXT:    shll $4, %eax
-; AVX-NEXT:    shrl $4, %edi
+; AVX-NEXT:    movzwl %di, %eax
 ; AVX-NEXT:    andl $3855, %edi # imm = 0xF0F
-; AVX-NEXT:    orl %eax, %edi
-; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    shll $4, %edi
+; AVX-NEXT:    shrl $4, %eax
+; AVX-NEXT:    andl $3855, %eax # imm = 0xF0F
+; AVX-NEXT:    orl %edi, %eax
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    andl $13107, %ecx # imm = 0x3333
+; AVX-NEXT:    shrl $2, %eax
 ; AVX-NEXT:    andl $13107, %eax # imm = 0x3333
-; AVX-NEXT:    shrl $2, %edi
-; AVX-NEXT:    andl $13107, %edi # imm = 0x3333
-; AVX-NEXT:    leal (%rdi,%rax,4), %eax
+; AVX-NEXT:    leal (%rax,%rcx,4), %eax
 ; AVX-NEXT:    movl %eax, %ecx
 ; AVX-NEXT:    andl $21845, %ecx # imm = 0x5555
 ; AVX-NEXT:    shrl %eax
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index d5a724139ffd3..83ce464603cf5 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -5727,31 +5727,29 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
 ; AVX1-NEXT:    pushq %rbx
 ; AVX1-NEXT:    movq (%rdi), %rax
 ; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    movq %rax, %r9
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    movl %eax, %r11d
-; AVX1-NEXT:    movl %eax, %ebx
+; AVX1-NEXT:    movzbl %ah, %ebx
 ; AVX1-NEXT:    vmovd %eax, %xmm0
-; AVX1-NEXT:    shrl $8, %eax
-; AVX1-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $1, %ebx, %xmm0, %xmm0
+; AVX1-NEXT:    movl %eax, %ebx
 ; AVX1-NEXT:    shrl $16, %ebx
 ; AVX1-NEXT:    vpinsrb $2, %ebx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $24, %r11d
-; AVX1-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $32, %r10
-; AVX1-NEXT:    vpinsrb $4, %r10d, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $40, %r9
-; AVX1-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $48, %r8
-; AVX1-NEXT:    vpinsrb $6, %r8d, %xmm0, %xmm0
+; AVX1-NEXT:    movl %eax, %ebx
+; AVX1-NEXT:    shrl $24, %ebx
+; AVX1-NEXT:    vpinsrb $3, %ebx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rbx
+; AVX1-NEXT:    movq %rax, %r8
 ; AVX1-NEXT:    movq 8(%rdi), %rax
-; AVX1-NEXT:    shrq $56, %rcx
-; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $8, %ecx
+; AVX1-NEXT:    shrq $32, %rbx
+; AVX1-NEXT:    vpinsrb $4, %ebx, %xmm0, %xmm0
+; AVX1-NEXT:    movzbl %bh, %edi
+; AVX1-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
+; AVX1-NEXT:    movzbl %ah, %edi
+; AVX1-NEXT:    shrq $56, %r8
+; AVX1-NEXT:    shrq $48, %rcx
+; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $7, %r8d, %xmm0, %xmm0
 ; AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $9, %edi, %xmm0, %xmm0
 ; AVX1-NEXT:    movl %eax, %ecx
 ; AVX1-NEXT:    shrl $16, %ecx
 ; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
@@ -5759,16 +5757,15 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
 ; AVX1-NEXT:    shrl $24, %ecx
 ; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
 ; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $32, %rcx
-; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $40, %rcx
-; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    movq %rax, %rdi
+; AVX1-NEXT:    shrq $32, %rax
+; AVX1-NEXT:    movzbl %ah, %ebx
+; AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $13, %ebx, %xmm0, %xmm0
 ; AVX1-NEXT:    shrq $48, %rcx
 ; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    shrq $56, %rax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    shrq $56, %rdi
+; AVX1-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb 32(%rsi), %xmm0, %xmm1
 ; AVX1-NEXT:    vpaddb (%rsi), %xmm0, %xmm0
 ; AVX1-NEXT:    vmovaps 16(%rsi), %xmm2
@@ -5785,31 +5782,29 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
 ; AVX2-NEXT:    pushq %rbx
 ; AVX2-NEXT:    movq (%rdi), %rax
 ; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    movq %rax, %r9
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    movl %eax, %r11d
-; AVX2-NEXT:    movl %eax, %ebx
+; AVX2-NEXT:    movzbl %ah, %ebx
 ; AVX2-NEXT:    vmovd %eax, %xmm0
-; AVX2-NEXT:    shrl $8, %eax
-; AVX2-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $1, %ebx, %xmm0, %xmm0
+; AVX2-NEXT:    movl %eax, %ebx
 ; AVX2-NEXT:    shrl $16, %ebx
 ; AVX2-NEXT:    vpinsrb $2, %ebx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $24, %r11d
-; AVX2-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $32, %r10
-; AVX2-NEXT:    vpinsrb $4, %r10d, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $40, %r9
-; AVX2-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $48, %r8
-; AVX2-NEXT:    vpinsrb $6, %r8d, %xmm0, %xmm0
+; AVX2-NEXT:    movl %eax, %ebx
+; AVX2-NEXT:    shrl $24, %ebx
+; AVX2-NEXT:    vpinsrb $3, %ebx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rbx
+; AVX2-NEXT:    movq %rax, %r8
 ; AVX2-NEXT:    movq 8(%rdi), %rax
-; AVX2-NEXT:    shrq $56, %rcx
-; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $8, %ecx
+; AVX2-NEXT:    shrq $32, %rbx
+; AVX2-NEXT:    vpinsrb $4, %ebx, %xmm0, %xmm0
+; AVX2-NEXT:    movzbl %bh, %edi
+; AVX2-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
+; AVX2-NEXT:    movzbl %ah, %edi
+; AVX2-NEXT:    shrq $56, %r8
+; AVX2-NEXT:    shrq $48, %rcx
+; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $7, %r8d, %xmm0, %xmm0
 ; AVX2-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $9, %edi, %xmm0, %xmm0
 ; AVX2-NEXT:    movl %eax, %ecx
 ; AVX2-NEXT:    shrl $16, %ecx
 ; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
@@ -5817,16 +5812,15 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
 ; AVX2-NEXT:    shrl $24, %ecx
 ; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
 ; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $32, %rcx
-; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $40, %rcx
-; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    movq %rax, %rdi
+; AVX2-NEXT:    shrq $32, %rax
+; AVX2-NEXT:    movzbl %ah, %ebx
+; AVX2-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $13, %ebx, %xmm0, %xmm0
 ; AVX2-NEXT:    shrq $48, %rcx
 ; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    shrq $56, %rax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    shrq $56, %rdi
+; AVX2-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
 ; AVX2-NEXT:    vpaddb 32(%rsi), %ymm0, %ymm1
 ; AVX2-NEXT:    vpaddb (%rsi), %ymm0, %ymm0
 ; AVX2-NEXT:    vmovdqa %ymm0, (%rdx)



More information about the llvm-commits mailing list