[llvm] [X86] Improve transform for add-like nodes to `add` (PR #83691)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 00:30:04 PST 2024


https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/83691

>From 9fdab97343088e07a60459e706a0bc1d112fe274 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 2 Mar 2024 14:47:08 -0600
Subject: [PATCH] [X86] Improve transform for add-like nodes to `add`

Remove bespoke logic and use `isADDLike`.
---
 llvm/lib/Target/X86/X86InstrCompiler.td       | 33 +++++++--
 llvm/lib/Target/X86/X86InstrFragments.td      |  7 +-
 .../AMDGPU/GlobalISel/inst-select-fabs.mir    | 72 +++++++++----------
 .../AMDGPU/GlobalISel/inst-select-fneg.mir    | 72 +++++++++----------
 .../AMDGPU/GlobalISel/inst-select-sext.mir    | 16 ++---
 .../AMDGPU/GlobalISel/inst-select-zext.mir    | 10 ++-
 llvm/test/CodeGen/X86/bitselect.ll            |  9 +--
 llvm/test/CodeGen/X86/fold-masked-merge.ll    | 38 +++++-----
 ...unfold-masked-merge-scalar-variablemask.ll |  9 +--
 ...unfold-masked-merge-vector-variablemask.ll | 39 +++++-----
 10 files changed, 155 insertions(+), 150 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f393f86e64aadd..ca36afd3b403e0 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1560,21 +1560,40 @@ let Predicates = [HasNDD] in {
 }
 
 // Depositing value to 8/16 bit subreg:
-def : Pat<(or (and GR64:$dst, -256), 
+def : Pat<(or (and GR64:$dst, -256),
               (i64 (zextloadi8 addr:$src))),
-          (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>; 
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>;
 
-def : Pat<(or (and GR32:$dst, -256), 
+def : Pat<(or (and GR32:$dst, -256),
               (i32 (zextloadi8 addr:$src))),
-          (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>; 
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>;
 
-def : Pat<(or (and GR64:$dst, -65536), 
+def : Pat<(or (and GR64:$dst, -65536),
               (i64 (zextloadi16 addr:$src))),
           (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
 
-def : Pat<(or (and GR32:$dst, -65536), 
+def : Pat<(or (and GR32:$dst, -65536),
               (i32 (zextloadi16 addr:$src))),
-          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>; 
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+// Same pattern as above but supporting `add` as the join
+// operator. Need to support `add` as well, as we can convert `or` ->
+// `add` when the `or` is `disjoint` (as in this patterns case).
+def : Pat<(add (and GR64:$dst, -256),
+               (i64 (zextloadi8 addr:$src))),
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>;
+
+def : Pat<(add (and GR32:$dst, -256),
+               (i32 (zextloadi8 addr:$src))),
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>;
+
+def : Pat<(add (and GR64:$dst, -65536),
+               (i64 (zextloadi16 addr:$src))),
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+def : Pat<(add (and GR32:$dst, -65536),
+               (i32 (zextloadi16 addr:$src))),
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
 
 // To avoid needing to materialize an immediate in a register, use a 32-bit and
 // with implicit zero-extension instead of a 64-bit and if the immediate has at
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d72f5b43..898639401bafc5 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -676,12 +676,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
 
 // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
 def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
-  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
-  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
-  return (~Known0.Zero & ~Known1.Zero) == 0;
+  return N->getOpcode() == ISD::OR && CurDAG->isADDLike(SDValue(N, 0));
 }]>;
 
 def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir
index ca75fd207607a5..aca51802511b92 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir
@@ -426,48 +426,44 @@ body: |
     ; SI: liveins: $sgpr0_sgpr1
     ; SI-NEXT: {{  $}}
     ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; VI-LABEL: name: fabs_s64_ss
     ; VI: liveins: $sgpr0_sgpr1
     ; VI-NEXT: {{  $}}
     ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX9-LABEL: name: fabs_s64_ss
     ; GFX9: liveins: $sgpr0_sgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX10-LABEL: name: fabs_s64_ss
     ; GFX10: liveins: $sgpr0_sgpr1
     ; GFX10-NEXT: {{  $}}
     ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(s64) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_FABS %0
@@ -643,48 +639,44 @@ body: |
     ; SI: liveins: $sgpr0_sgpr1
     ; SI-NEXT: {{  $}}
     ; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+    ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
     ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
-    ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
-    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; VI-LABEL: name: fabs_s64_ss_no_src_constraint
     ; VI: liveins: $sgpr0_sgpr1
     ; VI-NEXT: {{  $}}
     ; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+    ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
     ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
-    ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
-    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint
     ; GFX9: liveins: $sgpr0_sgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
     ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
-    ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint
     ; GFX10: liveins: $sgpr0_sgpr1
     ; GFX10-NEXT: {{  $}}
     ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
     ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY [[DEF]].sub1
-    ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[DEF]].sub0
-    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
     ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(s64) = IMPLICIT_DEF
     %1:sgpr(s64) = G_FABS %0:sgpr(s64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
index acda00231ec612..e7a52873225ead 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
@@ -426,48 +426,44 @@ body: |
     ; SI: liveins: $sgpr0_sgpr1
     ; SI-NEXT: {{  $}}
     ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
-    ; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
     ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; VI-LABEL: name: fneg_s64_ss
     ; VI: liveins: $sgpr0_sgpr1
     ; VI-NEXT: {{  $}}
     ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
-    ; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
     ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX9-LABEL: name: fneg_s64_ss
     ; GFX9: liveins: $sgpr0_sgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
-    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX10-LABEL: name: fneg_s64_ss
     ; GFX10: liveins: $sgpr0_sgpr1
     ; GFX10-NEXT: {{  $}}
     ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
-    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
     ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(s64) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_FNEG %0
@@ -1027,48 +1023,44 @@ body: |
     ; SI: liveins: $sgpr0_sgpr1
     ; SI-NEXT: {{  $}}
     ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
-    ; SI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
     ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; VI-LABEL: name: fneg_fabs_s64_ss
     ; VI: liveins: $sgpr0_sgpr1
     ; VI-NEXT: {{  $}}
     ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
-    ; VI-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
     ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX9-LABEL: name: fneg_fabs_s64_ss
     ; GFX9: liveins: $sgpr0_sgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
-    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ;
     ; GFX10-LABEL: name: fneg_fabs_s64_ss
     ; GFX10: liveins: $sgpr0_sgpr1
     ; GFX10-NEXT: {{  $}}
     ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
     ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1
-    ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
-    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[COPY]].sub0
-    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
     ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(s64) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_FABS %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
index 150d9b72afcee5..4a91afc88fa599 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
@@ -104,13 +104,10 @@ body: |
     ; GCN: liveins: $sgpr0
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 31
-    ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]]
-    ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[S_SEXT_I32_I16_]], [[S_MOV_B32_]], implicit-def dead $scc
-    ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_ASHR_I32_]]
-    ; GCN-NEXT: [[S_SEXT_I32_I16_1:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = S_SEXT_I32_I16 [[COPY]]
-    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_SEXT_I32_I16_1]], %subreg.sub0, [[COPY1]], %subreg.sub1
-    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s64) = G_SEXT %1
@@ -130,10 +127,9 @@ body: |
     ; GCN-LABEL: name: sext_sgpr_s32_to_sgpr_s64
     ; GCN: liveins: $sgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY $sgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], 31, implicit-def dead $scc
-    ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_ASHR_I32_]]
-    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1
     ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s64) = G_SEXT %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
index 1589172a028265..577a2e7d7d3660 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
@@ -104,12 +104,10 @@ body: |
     ; GCN: liveins: $sgpr0
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
-    ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
-    ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[S_AND_B32_]]
-    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
-    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s64) = G_ZEXT %1
diff --git a/llvm/test/CodeGen/X86/bitselect.ll b/llvm/test/CodeGen/X86/bitselect.ll
index 2922113b14ea90..27d77b07c82dac 100644
--- a/llvm/test/CodeGen/X86/bitselect.ll
+++ b/llvm/test/CodeGen/X86/bitselect.ll
@@ -45,11 +45,12 @@ define i16 @bitselect_i16(i16 %a, i16 %b, i16 %m) nounwind {
 ;
 ; X64-NOBMI-LABEL: bitselect_i16:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %edx, %eax
+; X64-NOBMI-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NOBMI-NEXT:    andl %edx, %esi
-; X64-NOBMI-NEXT:    notl %eax
-; X64-NOBMI-NEXT:    andl %edi, %eax
-; X64-NOBMI-NEXT:    orl %esi, %eax
+; X64-NOBMI-NEXT:    notl %edx
+; X64-NOBMI-NEXT:    andl %edi, %edx
+; X64-NOBMI-NEXT:    leal (%rdx,%rsi), %eax
 ; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NOBMI-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/fold-masked-merge.ll b/llvm/test/CodeGen/X86/fold-masked-merge.ll
index 135494ac25f8cb..3c1b1d59a67e61 100644
--- a/llvm/test/CodeGen/X86/fold-masked-merge.ll
+++ b/llvm/test/CodeGen/X86/fold-masked-merge.ll
@@ -30,11 +30,12 @@ define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
 define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
 ; NOBMI-LABEL: masked_merge1:
 ; NOBMI:       # %bb.0:
-; NOBMI-NEXT:    movl %edi, %eax
+; NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
 ; NOBMI-NEXT:    andl %edi, %esi
-; NOBMI-NEXT:    notl %eax
-; NOBMI-NEXT:    andl %edx, %eax
-; NOBMI-NEXT:    orl %esi, %eax
+; NOBMI-NEXT:    notl %edi
+; NOBMI-NEXT:    andl %edx, %edi
+; NOBMI-NEXT:    leal (%rdi,%rsi), %eax
 ; NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; NOBMI-NEXT:    retq
 ;
@@ -203,11 +204,12 @@ define i32 @not_a_masked_merge4(i32 %a0, i32 %a1, i32 %a2) {
 define i32 @masked_merge_no_transform0(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
 ; NOBMI-LABEL: masked_merge_no_transform0:
 ; NOBMI:       # %bb.0:
-; NOBMI-NEXT:    movl %edi, %eax
+; NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
 ; NOBMI-NEXT:    andl %edi, %esi
-; NOBMI-NEXT:    notl %eax
-; NOBMI-NEXT:    andl %edx, %eax
-; NOBMI-NEXT:    orl %esi, %eax
+; NOBMI-NEXT:    notl %edi
+; NOBMI-NEXT:    andl %edx, %edi
+; NOBMI-NEXT:    leal (%rdi,%rsi), %eax
 ; NOBMI-NEXT:    movl %esi, (%rcx)
 ; NOBMI-NEXT:    retq
 ;
@@ -230,11 +232,12 @@ define i32 @masked_merge_no_transform0(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
 define i32 @masked_merge_no_transform1(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
 ; NOBMI-LABEL: masked_merge_no_transform1:
 ; NOBMI:       # %bb.0:
-; NOBMI-NEXT:    movl %edx, %eax
+; NOBMI-NEXT:    # kill: def $edx killed $edx def $rdx
+; NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
 ; NOBMI-NEXT:    andl %edi, %esi
 ; NOBMI-NEXT:    notl %edi
-; NOBMI-NEXT:    andl %edi, %eax
-; NOBMI-NEXT:    orl %esi, %eax
+; NOBMI-NEXT:    andl %edi, %edx
+; NOBMI-NEXT:    leal (%rdx,%rsi), %eax
 ; NOBMI-NEXT:    movl %edi, (%rcx)
 ; NOBMI-NEXT:    retq
 ;
@@ -258,20 +261,21 @@ define i32 @masked_merge_no_transform1(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
 define i32 @masked_merge_no_transform2(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
 ; NOBMI-LABEL: masked_merge_no_transform2:
 ; NOBMI:       # %bb.0:
-; NOBMI-NEXT:    movl %esi, %eax
-; NOBMI-NEXT:    andl %edi, %eax
+; NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; NOBMI-NEXT:    andl %edi, %esi
 ; NOBMI-NEXT:    notl %edi
 ; NOBMI-NEXT:    andl %edx, %edi
-; NOBMI-NEXT:    orl %edi, %eax
+; NOBMI-NEXT:    leal (%rsi,%rdi), %eax
 ; NOBMI-NEXT:    movl %edi, (%rcx)
 ; NOBMI-NEXT:    retq
 ;
 ; BMI-LABEL: masked_merge_no_transform2:
 ; BMI:       # %bb.0:
-; BMI-NEXT:    movl %esi, %eax
-; BMI-NEXT:    andl %edi, %eax
+; BMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; BMI-NEXT:    andl %edi, %esi
 ; BMI-NEXT:    andnl %edx, %edi, %edx
-; BMI-NEXT:    orl %edx, %eax
+; BMI-NEXT:    leal (%rsi,%rdx), %eax
 ; BMI-NEXT:    movl %edx, (%rcx)
 ; BMI-NEXT:    retq
   %and0 = and i32 %a0, %a1
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
index 9c9d06921096cb..705edc8adc1261 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
@@ -33,11 +33,12 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
 define i16 @out16(i16 %x, i16 %y, i16 %mask) {
 ; CHECK-NOBMI-LABEL: out16:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    movl %edx, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %eax
-; CHECK-NOBMI-NEXT:    andl %esi, %eax
-; CHECK-NOBMI-NEXT:    orl %edi, %eax
+; CHECK-NOBMI-NEXT:    notl %edx
+; CHECK-NOBMI-NEXT:    andl %esi, %edx
+; CHECK-NOBMI-NEXT:    leal (%rdx,%rdi), %eax
 ; CHECK-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
index b1194bedc4e1ca..f83406d3c592ce 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
@@ -86,11 +86,12 @@ define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
 define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
 ; CHECK-LABEL: out_v1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    andl %edx, %edi
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    notl %edx
+; CHECK-NEXT:    andl %esi, %edx
+; CHECK-NEXT:    leal (%rdx,%rdi), %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %mx = and <1 x i16> %x, %mask
@@ -235,32 +236,38 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
 define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
 ; CHECK-BASELINE-LABEL: out_v2i16:
 ; CHECK-BASELINE:       # %bb.0:
-; CHECK-BASELINE-NEXT:    movl %r8d, %eax
+; CHECK-BASELINE-NEXT:    # kill: def $r9d killed $r9d def $r9
+; CHECK-BASELINE-NEXT:    # kill: def $r8d killed $r8d def $r8
+; CHECK-BASELINE-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-BASELINE-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-BASELINE-NEXT:    andl %r9d, %esi
 ; CHECK-BASELINE-NEXT:    andl %r8d, %edi
-; CHECK-BASELINE-NEXT:    notl %eax
+; CHECK-BASELINE-NEXT:    notl %r8d
 ; CHECK-BASELINE-NEXT:    notl %r9d
 ; CHECK-BASELINE-NEXT:    andl %ecx, %r9d
-; CHECK-BASELINE-NEXT:    orl %esi, %r9d
-; CHECK-BASELINE-NEXT:    andl %edx, %eax
-; CHECK-BASELINE-NEXT:    orl %edi, %eax
+; CHECK-BASELINE-NEXT:    leal (%r9,%rsi), %ecx
+; CHECK-BASELINE-NEXT:    andl %edx, %r8d
+; CHECK-BASELINE-NEXT:    leal (%r8,%rdi), %eax
 ; CHECK-BASELINE-NEXT:    # kill: def $ax killed $ax killed $eax
-; CHECK-BASELINE-NEXT:    movl %r9d, %edx
+; CHECK-BASELINE-NEXT:    movl %ecx, %edx
 ; CHECK-BASELINE-NEXT:    retq
 ;
 ; CHECK-SSE1-LABEL: out_v2i16:
 ; CHECK-SSE1:       # %bb.0:
-; CHECK-SSE1-NEXT:    movl %r8d, %eax
+; CHECK-SSE1-NEXT:    # kill: def $r9d killed $r9d def $r9
+; CHECK-SSE1-NEXT:    # kill: def $r8d killed $r8d def $r8
+; CHECK-SSE1-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-SSE1-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-SSE1-NEXT:    andl %r9d, %esi
 ; CHECK-SSE1-NEXT:    andl %r8d, %edi
-; CHECK-SSE1-NEXT:    notl %eax
+; CHECK-SSE1-NEXT:    notl %r8d
 ; CHECK-SSE1-NEXT:    notl %r9d
 ; CHECK-SSE1-NEXT:    andl %ecx, %r9d
-; CHECK-SSE1-NEXT:    orl %esi, %r9d
-; CHECK-SSE1-NEXT:    andl %edx, %eax
-; CHECK-SSE1-NEXT:    orl %edi, %eax
+; CHECK-SSE1-NEXT:    leal (%r9,%rsi), %ecx
+; CHECK-SSE1-NEXT:    andl %edx, %r8d
+; CHECK-SSE1-NEXT:    leal (%r8,%rdi), %eax
 ; CHECK-SSE1-NEXT:    # kill: def $ax killed $ax killed $eax
-; CHECK-SSE1-NEXT:    movl %r9d, %edx
+; CHECK-SSE1-NEXT:    movl %ecx, %edx
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_v2i16:



More information about the llvm-commits mailing list