[llvm] [DAGCombiner] Freeze maybe poison operands when folding select to logic (PR #84924)

Björn Pettersson via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 9 06:19:41 PDT 2024


https://github.com/bjope updated https://github.com/llvm/llvm-project/pull/84924

>From d0d4003a34910bce5195894b65a634003e64696d Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Fri, 15 Mar 2024 14:20:26 +0100
Subject: [PATCH 1/3] Add test cases for SELECT->AND miscompiles in DAGCombiner

Adding reproducers for github issues #84653 and #85190.
---
 llvm/test/CodeGen/RISCV/pr84653_pr85190.ll | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/pr84653_pr85190.ll

diff --git a/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll b/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
new file mode 100644
index 00000000000000..3fa494e1a57ddc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=CHECK-NOZBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=CHECK-ZBB
+
+; This test case miscompiled for ZBB (DAGCombiner turned a SELECT into a more
+; poisonous AND operation).
+define i1 @pr84653(i32 %x) {
+; CHECK-NOZBB-LABEL: pr84653:
+; CHECK-NOZBB:       # %bb.0:
+; CHECK-NOZBB-NEXT:    sext.w a1, a0
+; CHECK-NOZBB-NEXT:    sgtz a2, a1
+; CHECK-NOZBB-NEXT:    lui a3, 524288
+; CHECK-NOZBB-NEXT:    addi a3, a3, -1
+; CHECK-NOZBB-NEXT:    xor a0, a0, a3
+; CHECK-NOZBB-NEXT:    sext.w a0, a0
+; CHECK-NOZBB-NEXT:    slt a0, a0, a1
+; CHECK-NOZBB-NEXT:    and a0, a2, a0
+; CHECK-NOZBB-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: pr84653:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    sext.w a1, a0
+; CHECK-ZBB-NEXT:    lui a2, 524288
+; CHECK-ZBB-NEXT:    addi a2, a2, -1
+; CHECK-ZBB-NEXT:    xor a0, a0, a2
+; CHECK-ZBB-NEXT:    sext.w a0, a0
+; CHECK-ZBB-NEXT:    max a0, a0, zero
+; CHECK-ZBB-NEXT:    slt a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
+  %cmp1 = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 2147483647, %x  ; 0x7fffffff
+  %cmp2 = icmp sgt i32 %x, %sub
+  %r = select i1 %cmp1, i1 %cmp2, i1 false
+  ret i1 %r
+}
+
+; This test case miscompiled for ZBB (DAGCombiner turned a SELECT into a more
+; poisonous AND operation).
+define i1 @pr85190(i64 %a) {
+; CHECK-NOZBB-LABEL: pr85190:
+; CHECK-NOZBB:       # %bb.0:
+; CHECK-NOZBB-NEXT:    ori a1, a0, 7
+; CHECK-NOZBB-NEXT:    slti a2, a0, 0
+; CHECK-NOZBB-NEXT:    li a3, -1
+; CHECK-NOZBB-NEXT:    slli a3, a3, 63
+; CHECK-NOZBB-NEXT:    sub a3, a3, a1
+; CHECK-NOZBB-NEXT:    slt a0, a0, a3
+; CHECK-NOZBB-NEXT:    and a0, a2, a0
+; CHECK-NOZBB-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: pr85190:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    ori a1, a0, 7
+; CHECK-ZBB-NEXT:    li a2, -1
+; CHECK-ZBB-NEXT:    slli a2, a2, 63
+; CHECK-ZBB-NEXT:    sub a2, a2, a1
+; CHECK-ZBB-NEXT:    slt a0, a0, a2
+; CHECK-ZBB-NEXT:    ret
+  %or = or i64 %a, 7
+  %cmp1 = icmp slt i64 %a, 0
+  %sub = sub nsw i64 -9223372036854775808, %or  ; 0x8000000000000000
+  %cmp2 = icmp sgt i64 %sub, %a
+  %res = select i1 %cmp1, i1 %cmp2, i1 false
+  ret i1 %res
+}
+
+define i1 @select_to_or(i32 %x) {
+; CHECK-NOZBB-LABEL: select_to_or:
+; CHECK-NOZBB:       # %bb.0:
+; CHECK-NOZBB-NEXT:    sext.w a1, a0
+; CHECK-NOZBB-NEXT:    sgtz a2, a1
+; CHECK-NOZBB-NEXT:    lui a3, 524288
+; CHECK-NOZBB-NEXT:    addi a3, a3, -1
+; CHECK-NOZBB-NEXT:    xor a0, a0, a3
+; CHECK-NOZBB-NEXT:    sext.w a0, a0
+; CHECK-NOZBB-NEXT:    slt a0, a0, a1
+; CHECK-NOZBB-NEXT:    or a0, a2, a0
+; CHECK-NOZBB-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: select_to_or:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    sext.w a1, a0
+; CHECK-ZBB-NEXT:    lui a2, 524288
+; CHECK-ZBB-NEXT:    addi a2, a2, -1
+; CHECK-ZBB-NEXT:    xor a0, a0, a2
+; CHECK-ZBB-NEXT:    sext.w a0, a0
+; CHECK-ZBB-NEXT:    min a0, a0, zero
+; CHECK-ZBB-NEXT:    slt a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
+  %cmp1 = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 2147483647, %x  ; 0x7fffffff
+  %cmp2 = icmp sgt i32 %x, %sub
+  %r = select i1 %cmp1, i1 true, i1 %cmp2
+  ret i1 %r
+}

>From ae738636ce1234fe87614f1fa8b7d8ad4609622a Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Tue, 12 Mar 2024 15:24:33 +0100
Subject: [PATCH 2/3] [DAGCombiner] Freeze maybe poison operands when folding
 select to logic

Just like for regular IR we need to treat SELECT as conditionally
blocking poison. So (unless the condition itself is poison) the
result is only poison if the selected true/false value is poison.
Thus, when doing DAG combines that turn SELECT into arithmetic/logical
operations (e.g. AND/OR) we need to make sure that the new operations
aren't more poisonous. One way to do that is to use FREEZE to make
sure the operands aren't posion.

This patch aims at fixing the kind of miscompiles reported in
  https://github.com/llvm/llvm-project/issues/84653
and
  https://github.com/llvm/llvm-project/issues/85190

Solution is to make sure that we insert FREEZE, if needed to make
the fold sound, when using the foldBoolSelectToLogic and
foldVSelectToSignBitSplatMask DAG combines.

This may result in some (hopefully minor) regressions since we lack
some ways to fold away the freeze (or due to isGuaranteedNotToBePoison
being too pessimistic). Focus in this patch is to just avoid
miscompiles, but I think some of the regressions can be avoided by
general improvements regarding poison/freeze handling in SelectionDAG.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  18 +-
 llvm/test/CodeGen/AArch64/cmp-chains.ll       | 188 +++++++-----------
 ...rleaving-reductions-predicated-scalable.ll |   3 +-
 llvm/test/CodeGen/AArch64/fast-isel-select.ll |  20 +-
 .../AArch64/intrinsic-cttz-elts-sve.ll        |   3 +-
 .../CodeGen/AArch64/select-with-and-or.ll     |  26 ++-
 .../CodeGen/AArch64/sve-fp-int-min-max.ll     |   3 +-
 llvm/test/CodeGen/AMDGPU/div_i128.ll          |  64 +++---
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        | 134 ++++++-------
 .../AMDGPU/divergence-driven-trunc-to-i1.ll   |  54 ++---
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        |  60 +++---
 llvm/test/CodeGen/AMDGPU/rem_i128.ll          |  64 +++---
 llvm/test/CodeGen/PowerPC/pr40922.ll          |   7 +-
 llvm/test/CodeGen/RISCV/pr84653_pr85190.ll    |  28 +--
 llvm/test/CodeGen/SystemZ/pr60413.ll          | 177 +++++++----------
 llvm/test/CodeGen/Thumb2/csel-andor-onebit.ll |   8 +-
 llvm/test/CodeGen/VE/Scalar/max.ll            |   2 +
 llvm/test/CodeGen/VE/Scalar/min.ll            |   2 +
 llvm/test/CodeGen/X86/avx512-logic.ll         |  12 +-
 llvm/test/CodeGen/X86/avx512-vec-cmp.ll       |   8 +-
 llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll     |   8 +-
 llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll   |  16 +-
 llvm/test/CodeGen/X86/avx512vl-logic.ll       |  24 +--
 llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll     |  56 +++---
 .../div-rem-pair-recomposition-unsigned.ll    |  51 +++--
 llvm/test/CodeGen/X86/fcmp-logic.ll           |  12 +-
 llvm/test/CodeGen/X86/fold-select.ll          |  10 +-
 llvm/test/CodeGen/X86/pr64589.ll              |   4 +-
 .../test/CodeGen/X86/vector-compare-all_of.ll |  41 ++--
 .../test/CodeGen/X86/vector-compare-any_of.ll |  27 ++-
 30 files changed, 543 insertions(+), 587 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8fe074666a3dc9..50046d380e2d80 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11343,28 +11343,28 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
     return SDValue();
 
-  // select Cond, Cond, F --> or Cond, F
-  // select Cond, 1, F    --> or Cond, F
+  // select Cond, Cond, F --> or Cond, freeze(F)
+  // select Cond, 1, F    --> or Cond, freeze(F)
   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
-    return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
+    return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, DAG.getFreeze(F));
 
   // select Cond, T, Cond --> and Cond, T
   // select Cond, T, 0    --> and Cond, T
   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
-    return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
+    return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, DAG.getFreeze(T));
 
   // select Cond, T, 1 --> or (not Cond), T
   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
     SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
                                       DAG.getAllOnesConstant(SDLoc(N), VT));
-    return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
+    return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, DAG.getFreeze(T));
   }
 
   // select Cond, 0, F --> and (not Cond), F
   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
     SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
                                       DAG.getAllOnesConstant(SDLoc(N), VT));
-    return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
+    return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, DAG.getFreeze(F));
   }
 
   return SDValue();
@@ -11398,7 +11398,7 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
     SDLoc DL(N);
     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
-    return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+    return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
   }
 
   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
@@ -11406,7 +11406,7 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
     SDLoc DL(N);
     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
-    return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+    return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
   }
 
   // If we have to invert the sign bit mask, only do that transform if the
@@ -11418,7 +11418,7 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
     SDValue Not = DAG.getNOT(DL, Sra, VT);
-    return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+    return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
   }
 
   // TODO: There's another pattern in this family, but it may require
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 1d9f39e5185939..8cb525f14cc813 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -6,21 +6,14 @@
 
 ; (x0 < x1) && (x2 > x3)
 define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
-; SDISEL-LABEL: cmp_and2:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w1
-; SDISEL-NEXT:    ccmp w2, w3, #0, lo
-; SDISEL-NEXT:    cset w0, hi
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: cmp_and2:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lo
-; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w9, hi
-; GISEL-NEXT:    and w0, w8, w9
-; GISEL-NEXT:    ret
+; CHECK-LABEL: cmp_and2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, lo
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, hi
+; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    ret
   %5 = icmp ult i32 %0, %1
   %6 = icmp ugt i32 %2, %3
   %7 = select i1 %5, i1 %6, i1 false
@@ -30,25 +23,17 @@ define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
 
 ; (x0 < x1) && (x2 > x3) && (x4 != x5)
 define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
-; SDISEL-LABEL: cmp_and3:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w1
-; SDISEL-NEXT:    ccmp w2, w3, #0, lo
-; SDISEL-NEXT:    ccmp w4, w5, #4, hi
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: cmp_and3:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lo
-; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w9, hi
-; GISEL-NEXT:    cmp w4, w5
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    and w0, w8, w9
-; GISEL-NEXT:    ret
+; CHECK-LABEL: cmp_and3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, lo
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, hi
+; CHECK-NEXT:    cmp w4, w5
+; CHECK-NEXT:    and w8, w8, w9
+; CHECK-NEXT:    cset w9, ne
+; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    ret
   %7 = icmp ult i32 %0, %1
   %8 = icmp ugt i32 %2, %3
   %9 = select i1 %7, i1 %8, i1 false
@@ -60,29 +45,20 @@ define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
 
 ; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
 define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
-; SDISEL-LABEL: cmp_and4:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w2, w3
-; SDISEL-NEXT:    ccmp w0, w1, #2, hi
-; SDISEL-NEXT:    ccmp w4, w5, #4, lo
-; SDISEL-NEXT:    ccmp w6, w7, #0, ne
-; SDISEL-NEXT:    cset w0, eq
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: cmp_and4:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w8, hi
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w9, lo
-; GISEL-NEXT:    cmp w4, w5
-; GISEL-NEXT:    cset w10, ne
-; GISEL-NEXT:    cmp w6, w7
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    cset w11, eq
-; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    and w0, w8, w9
-; GISEL-NEXT:    ret
+; CHECK-LABEL: cmp_and4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w8, hi
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w9, lo
+; CHECK-NEXT:    cmp w4, w5
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    cmp w6, w7
+; CHECK-NEXT:    and w8, w8, w9
+; CHECK-NEXT:    cset w11, eq
+; CHECK-NEXT:    and w9, w10, w11
+; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    ret
   %9 = icmp ugt i32 %2, %3
   %10 = icmp ult i32 %0, %1
   %11 = select i1 %9, i1 %10, i1 false
@@ -96,22 +72,15 @@ define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
 
 ; (x0 < x1) || (x2 > x3)
 define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
-; SDISEL-LABEL: cmp_or2:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w1
-; SDISEL-NEXT:    ccmp w2, w3, #0, hs
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: cmp_or2:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lo
-; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: cmp_or2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, lo
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, ne
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
   %5 = icmp ult i32 %0, %1
   %6 = icmp ne i32 %2, %3
   %7 = select i1 %5, i1 true, i1 %6
@@ -121,26 +90,18 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
 
 ; (x0 < x1) || (x2 > x3) || (x4 != x5)
 define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
-; SDISEL-LABEL: cmp_or3:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w1
-; SDISEL-NEXT:    ccmp w2, w3, #2, hs
-; SDISEL-NEXT:    ccmp w4, w5, #0, ls
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: cmp_or3:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lo
-; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w9, hi
-; GISEL-NEXT:    cmp w4, w5
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: cmp_or3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, lo
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, hi
+; CHECK-NEXT:    cmp w4, w5
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    cset w9, ne
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
   %7 = icmp ult i32 %0, %1
   %8 = icmp ugt i32 %2, %3
   %9 = select i1 %7, i1 true, i1 %8
@@ -152,30 +113,21 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
 
 ; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
 define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
-; SDISEL-LABEL: cmp_or4:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w1
-; SDISEL-NEXT:    ccmp w2, w3, #2, hs
-; SDISEL-NEXT:    ccmp w4, w5, #0, ls
-; SDISEL-NEXT:    ccmp w6, w7, #4, eq
-; SDISEL-NEXT:    cset w0, eq
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: cmp_or4:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lo
-; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w9, hi
-; GISEL-NEXT:    cmp w4, w5
-; GISEL-NEXT:    cset w10, ne
-; GISEL-NEXT:    cmp w6, w7
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    cset w11, eq
-; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: cmp_or4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, lo
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, hi
+; CHECK-NEXT:    cmp w4, w5
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    cmp w6, w7
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    cset w11, eq
+; CHECK-NEXT:    orr w9, w10, w11
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
   %9 = icmp ult i32 %0, %1
   %10 = icmp ugt i32 %2, %3
   %11 = select i1 %9, i1 true, i1 %10
@@ -242,5 +194,3 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
   %9 = zext i1 %8 to i32
   ret i32 %9
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index 467c3c254fc2d3..5ee9e89c34210b 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -236,7 +236,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
 ; CHECK-NEXT:    mov z7.d, z0.d
 ; CHECK-NEXT:    add x9, x9, x11
 ; CHECK-NEXT:    add x8, x8, x12
-; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
+; CHECK-NEXT:    cmpne p2.d, p0/z, z2.d, #0
+; CHECK-NEXT:    and p1.b, p1/z, p1.b, p2.b
 ; CHECK-NEXT:    zip2 p3.d, p1.d, p1.d
 ; CHECK-NEXT:    zip1 p2.d, p1.d, p1.d
 ; CHECK-NEXT:    whilelo p1.d, x9, x10
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
index 6ad4a5ae572e0e..4ef4ee41e8aebe 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GISEL
 
 ; First test the different supported value types for select.
 define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -295,22 +295,28 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
 ; Test peephole optimizations for select.
 define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
 ; CHECK-LABEL: select_opt1
-; CHECK:       orr {{w[0-9]+}}, w0, w1
+; SISEL:       orr [[REG:w[0-9]+]], w0, w1
+; SISEL:       and w0, [[REG]], #0x1
+; FISEL:       orr {{w[0-9]+}}, w0, w1
   %1 = select i1 %c, i1 true, i1 %a
   ret i1 %1
 }
 
 define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
 ; CHECK-LABEL: select_opt2
-; CHECK:       eor [[REG:w[0-9]+]], w0, #0x1
-; CHECK:       orr {{w[0-9]+}}, [[REG]], w1
+; SISEL:       orn [[REG:w[0-9]+]], w1, w0
+; SISEL:       and w0, [[REG]], #0x1
+; FISEL:       eor [[REG:w[0-9]+]], w0, #0x1
+; FISEL:       orr {{w[0-9]+}}, [[REG]], w1
   %1 = select i1 %c, i1 %a, i1 true
   ret i1 %1
 }
 
 define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
 ; CHECK-LABEL: select_opt3
-; CHECK:       bic {{w[0-9]+}}, w1, w0
+; SISEL:       eor [[REG:w[0-9]+]], w0, #0x1
+; SISEL:       and w0, [[REG]], w1
+; FISEL:       bic {{w[0-9]+}}, w1, w0
   %1 = select i1 %c, i1 false, i1 %a
   ret i1 %1
 }
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 1a4ab6ab334a64..fadb0a3379e4e8 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -213,7 +213,8 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
 ; CHECK-LABEL: ctz_and_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    cmpne p2.b, p1/z, z0.b, z1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p2.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
index 84b6818eaa739c..1fdb9d34bf1024 100644
--- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll
+++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
@@ -5,8 +5,10 @@ define i1 @and(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: and:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    ccmp w2, w3, #4, eq
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, gt
+; CHECK-NEXT:    and w0, w8, w9
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
@@ -18,8 +20,11 @@ define i1 @or(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: or:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    ccmp w2, w3, #0, ne
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, gt
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
@@ -31,8 +36,10 @@ define i1 @and_not(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: and_not:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    ccmp w2, w3, #4, ne
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, gt
+; CHECK-NEXT:    and w0, w8, w9
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
@@ -44,8 +51,11 @@ define i1 @or_not(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: or_not:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    ccmp w2, w3, #0, eq
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cmp w2, w3
+; CHECK-NEXT:    cset w9, gt
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
index 5ff9f0f0df62f8..1d7ec72ccf27bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
@@ -24,7 +24,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z4.s
 ; CHECK-NEXT:    fcmge p2.s, p0/z, z0.s, z3.s
 ; CHECK-NEXT:    add z0.d, z2.d, z1.d
-; CHECK-NEXT:    bic p2.b, p1/z, p1.b, p2.b
+; CHECK-NEXT:    not p2.b, p0/z, p2.b
+; CHECK-NEXT:    and p2.b, p1/z, p1.b, p2.b
 ; CHECK-NEXT:    mov z0.d, p2/m, z2.d
 ; CHECK-NEXT:    sel z0.d, p1, z0.d, z2.d
 ; CHECK-NEXT:    uaddv d0, p0, z0.d
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index cf99b5d80e13a8..1cb8304ae6b4a7 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -466,28 +466,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
 ; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
@@ -498,7 +491,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1032,10 +1024,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -2737,28 +2729,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
 ; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
@@ -2769,7 +2754,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3303,10 +3287,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 16a03badcb1329..efb89499b29f0a 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -16,103 +16,103 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_xor_b32_e32 v18, v24, v2
 ; SDAG-NEXT:    v_xor_b32_e32 v1, v24, v1
 ; SDAG-NEXT:    v_xor_b32_e32 v0, v24, v0
-; SDAG-NEXT:    v_xor_b32_e32 v19, v25, v11
-; SDAG-NEXT:    v_xor_b32_e32 v20, v25, v10
-; SDAG-NEXT:    v_xor_b32_e32 v9, v25, v9
-; SDAG-NEXT:    v_xor_b32_e32 v8, v25, v8
+; SDAG-NEXT:    v_xor_b32_e32 v11, v25, v11
+; SDAG-NEXT:    v_xor_b32_e32 v10, v25, v10
+; SDAG-NEXT:    v_xor_b32_e32 v19, v25, v9
+; SDAG-NEXT:    v_xor_b32_e32 v20, v25, v8
 ; SDAG-NEXT:    v_sub_i32_e32 v2, vcc, v0, v24
 ; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v1, v24, vcc
 ; SDAG-NEXT:    v_ffbh_u32_e32 v0, v2
-; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, v18, v24, vcc
+; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v18, v24, vcc
 ; SDAG-NEXT:    v_add_i32_e64 v1, s[4:5], 32, v0
 ; SDAG-NEXT:    v_ffbh_u32_e32 v18, v3
-; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v17, v24, vcc
-; SDAG-NEXT:    v_or_b32_e32 v0, v2, v10
-; SDAG-NEXT:    v_ffbh_u32_e32 v17, v10
+; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, v17, v24, vcc
+; SDAG-NEXT:    v_or_b32_e32 v0, v2, v8
+; SDAG-NEXT:    v_ffbh_u32_e32 v17, v8
 ; SDAG-NEXT:    v_min_u32_e32 v18, v1, v18
-; SDAG-NEXT:    v_sub_i32_e32 v28, vcc, v8, v25
-; SDAG-NEXT:    v_or_b32_e32 v1, v3, v11
-; SDAG-NEXT:    v_add_i32_e64 v8, s[4:5], 32, v17
-; SDAG-NEXT:    v_ffbh_u32_e32 v17, v11
+; SDAG-NEXT:    v_sub_i32_e32 v28, vcc, v20, v25
+; SDAG-NEXT:    v_or_b32_e32 v1, v3, v9
+; SDAG-NEXT:    v_add_i32_e64 v17, s[4:5], 32, v17
+; SDAG-NEXT:    v_ffbh_u32_e32 v20, v9
 ; SDAG-NEXT:    v_add_i32_e64 v18, s[4:5], 64, v18
 ; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_subb_u32_e32 v29, vcc, v9, v25, vcc
+; SDAG-NEXT:    v_subb_u32_e32 v29, vcc, v19, v25, vcc
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; SDAG-NEXT:    v_ffbh_u32_e32 v1, v28
-; SDAG-NEXT:    v_min_u32_e32 v8, v8, v17
-; SDAG-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[10:11]
-; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, s[6:7]
-; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, v20, v25, vcc
-; SDAG-NEXT:    v_add_i32_e64 v9, s[8:9], 32, v1
-; SDAG-NEXT:    v_ffbh_u32_e32 v20, v29
-; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, v8, s[6:7]
-; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v19, v25, vcc
-; SDAG-NEXT:    v_or_b32_e32 v8, v28, v0
-; SDAG-NEXT:    v_ffbh_u32_e32 v19, v0
-; SDAG-NEXT:    v_min_u32_e32 v20, v9, v20
-; SDAG-NEXT:    v_or_b32_e32 v9, v29, v1
-; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 32, v19
+; SDAG-NEXT:    v_min_u32_e32 v17, v17, v20
+; SDAG-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[8:9]
+; SDAG-NEXT:    v_cndmask_b32_e64 v19, v21, 0, s[6:7]
+; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, v10, v25, vcc
+; SDAG-NEXT:    v_add_i32_e64 v20, s[8:9], 32, v1
+; SDAG-NEXT:    v_ffbh_u32_e32 v21, v29
+; SDAG-NEXT:    v_cndmask_b32_e64 v17, v18, v17, s[6:7]
+; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v11, v25, vcc
+; SDAG-NEXT:    v_or_b32_e32 v10, v28, v0
+; SDAG-NEXT:    v_ffbh_u32_e32 v18, v0
+; SDAG-NEXT:    v_min_u32_e32 v20, v20, v21
+; SDAG-NEXT:    v_or_b32_e32 v11, v29, v1
+; SDAG-NEXT:    v_add_i32_e32 v18, vcc, 32, v18
 ; SDAG-NEXT:    v_ffbh_u32_e32 v21, v1
 ; SDAG-NEXT:    v_add_i32_e32 v20, vcc, 64, v20
 ; SDAG-NEXT:    v_addc_u32_e64 v22, s[6:7], 0, 0, vcc
-; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
-; SDAG-NEXT:    v_min_u32_e32 v8, v19, v21
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
+; SDAG-NEXT:    v_min_u32_e32 v10, v18, v21
 ; SDAG-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, v22, 0, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, v22, 0, s[6:7]
 ; SDAG-NEXT:    s_or_b64 s[8:9], vcc, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, v20, v8, s[6:7]
-; SDAG-NEXT:    v_sub_i32_e32 v8, vcc, v8, v18
-; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, v9, v17, vcc
-; SDAG-NEXT:    v_xor_b32_e32 v17, 0x7f, v8
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, v20, v10, s[6:7]
+; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, v10, v17
+; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v11, v19, vcc
+; SDAG-NEXT:    v_xor_b32_e32 v17, 0x7f, v10
 ; SDAG-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v16, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[8:9]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subbrev_u32_e32 v19, vcc, 0, v16, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v16, v17, v18
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; SDAG-NEXT:    v_or_b32_e32 v17, v9, v19
+; SDAG-NEXT:    v_or_b32_e32 v17, v11, v19
 ; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v20, v21, v20, vcc
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
 ; SDAG-NEXT:    v_and_b32_e32 v16, 1, v20
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v16
 ; SDAG-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v20, v11, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v20, v9, 0, s[4:5]
 ; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; SDAG-NEXT:    v_cndmask_b32_e64 v17, v10, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v17, v8, 0, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, v3, 0, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v16, v2, 0, s[4:5]
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_6
 ; SDAG-NEXT:  ; %bb.1: ; %udiv-bb15
-; SDAG-NEXT:    v_add_i32_e32 v30, vcc, 1, v8
-; SDAG-NEXT:    v_sub_i32_e64 v20, s[4:5], 63, v8
+; SDAG-NEXT:    v_add_i32_e32 v30, vcc, 1, v10
+; SDAG-NEXT:    v_sub_i32_e64 v20, s[4:5], 63, v10
 ; SDAG-NEXT:    v_mov_b32_e32 v16, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v17, 0
-; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, 0, v9, vcc
+; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, 0, v11, vcc
 ; SDAG-NEXT:    v_lshl_b64 v[20:21], v[2:3], v20
 ; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, 0, v18, vcc
 ; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, 0, v19, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v18, v30, v32
-; SDAG-NEXT:    v_sub_i32_e32 v34, vcc, 0x7f, v8
+; SDAG-NEXT:    v_sub_i32_e32 v34, vcc, 0x7f, v10
 ; SDAG-NEXT:    v_or_b32_e32 v19, v31, v33
-; SDAG-NEXT:    v_lshl_b64 v[8:9], v[10:11], v34
+; SDAG-NEXT:    v_lshl_b64 v[10:11], v[8:9], v34
 ; SDAG-NEXT:    v_sub_i32_e32 v35, vcc, 64, v34
 ; SDAG-NEXT:    v_lshl_b64 v[22:23], v[2:3], v34
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
 ; SDAG-NEXT:    v_lshr_b64 v[18:19], v[2:3], v35
-; SDAG-NEXT:    v_or_b32_e32 v9, v9, v19
-; SDAG-NEXT:    v_or_b32_e32 v8, v8, v18
+; SDAG-NEXT:    v_or_b32_e32 v11, v11, v19
+; SDAG-NEXT:    v_or_b32_e32 v10, v10, v18
 ; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, v21, v9, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, v20, v8, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, v21, v11, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, v20, v10, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, v23, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v20, 0, v22, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v34
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, v11, v9, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, v10, v8, s[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v18, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v19, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -122,24 +122,24 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[16:17], v[2:3], v30
 ; SDAG-NEXT:    v_sub_i32_e32 v35, vcc, 64, v30
 ; SDAG-NEXT:    v_subrev_i32_e32 v36, vcc, 64, v30
-; SDAG-NEXT:    v_lshr_b64 v[37:38], v[10:11], v30
+; SDAG-NEXT:    v_lshr_b64 v[37:38], v[8:9], v30
 ; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v28
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
 ; SDAG-NEXT:    v_mov_b32_e32 v22, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v23, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v18, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v19, 0
-; SDAG-NEXT:    v_lshl_b64 v[48:49], v[10:11], v35
-; SDAG-NEXT:    v_lshr_b64 v[10:11], v[10:11], v36
+; SDAG-NEXT:    v_lshl_b64 v[48:49], v[8:9], v35
+; SDAG-NEXT:    v_lshr_b64 v[8:9], v[8:9], v36
 ; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, -1, v29, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v17, v17, v49
 ; SDAG-NEXT:    v_or_b32_e32 v16, v16, v48
 ; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v0, vcc
 ; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
-; SDAG-NEXT:    v_cndmask_b32_e64 v17, v11, v17, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v16, v10, v16, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v38, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v37, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v17, v9, v17, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v16, v8, v16, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, v38, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v37, s[4:5]
 ; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, -1, v1, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v30
 ; SDAG-NEXT:    v_cndmask_b32_e32 v3, v17, v3, vcc
@@ -147,22 +147,22 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_mov_b32_e32 v17, 0
 ; SDAG-NEXT:  .LBB0_3: ; %udiv-do-while3
 ; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
-; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
+; SDAG-NEXT:    v_lshl_b64 v[8:9], v[8:9], 1
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v16, 31, v3
 ; SDAG-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
-; SDAG-NEXT:    v_lshrrev_b32_e32 v38, 31, v9
-; SDAG-NEXT:    v_lshl_b64 v[8:9], v[8:9], 1
+; SDAG-NEXT:    v_lshrrev_b32_e32 v38, 31, v11
+; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v39, 31, v21
 ; SDAG-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
-; SDAG-NEXT:    v_or_b32_e32 v10, v10, v16
+; SDAG-NEXT:    v_or_b32_e32 v8, v8, v16
 ; SDAG-NEXT:    v_or_b32_e32 v2, v2, v38
-; SDAG-NEXT:    v_or_b32_e32 v8, v8, v39
-; SDAG-NEXT:    v_or_b32_e32 v9, v19, v9
+; SDAG-NEXT:    v_or_b32_e32 v10, v10, v39
+; SDAG-NEXT:    v_or_b32_e32 v11, v19, v11
 ; SDAG-NEXT:    v_sub_i32_e32 v16, vcc, v34, v2
-; SDAG-NEXT:    v_or_b32_e32 v8, v18, v8
+; SDAG-NEXT:    v_or_b32_e32 v10, v18, v10
 ; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v35, v3, vcc
-; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v36, v10, vcc
-; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v37, v11, vcc
+; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v36, v8, vcc
+; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v37, v9, vcc
 ; SDAG-NEXT:    v_ashrrev_i32_e32 v38, 31, v16
 ; SDAG-NEXT:    v_and_b32_e32 v39, v38, v28
 ; SDAG-NEXT:    v_and_b32_e32 v48, v38, v29
@@ -171,8 +171,8 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_and_b32_e32 v38, v38, v1
 ; SDAG-NEXT:    v_sub_i32_e32 v2, vcc, v2, v39
 ; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v3, v48, vcc
-; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, v10, v49, vcc
-; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v11, v38, vcc
+; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v8, v49, vcc
+; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, v9, v38, vcc
 ; SDAG-NEXT:    v_add_i32_e32 v30, vcc, -1, v30
 ; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, -1, v31, vcc
 ; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, -1, v32, vcc
@@ -191,7 +191,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; SDAG-NEXT:  .LBB0_5: ; %Flow14
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    v_lshl_b64 v[0:1], v[8:9], 1
+; SDAG-NEXT:    v_lshl_b64 v[0:1], v[10:11], 1
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v8, 31, v21
 ; SDAG-NEXT:    v_lshl_b64 v[2:3], v[20:21], 1
 ; SDAG-NEXT:    v_or_b32_e32 v0, v0, v8
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
index c3a6cd5975a779..974a18084a357c 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
@@ -15,13 +15,16 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x
   ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
   ; GCN-NEXT:   [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]]
-  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc
-  ; GCN-NEXT:   S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GCN-NEXT:   S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc
+  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+  ; GCN-NEXT:   [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def dead $scc
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]]
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY3]], implicit-def dead $scc
+  ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY4]], killed [[COPY3]], implicit-def dead $scc
+  ; GCN-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_3]], implicit-def $scc
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY5]], killed [[COPY4]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -38,7 +41,8 @@ define i1 @divergent_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x, i1 %z) {
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GCN-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+  ; GCN-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[COPY2]], implicit $exec
   ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
   ; GCN-NEXT:   [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY1]], 0, 16, implicit $exec
   ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
@@ -65,15 +69,16 @@ define amdgpu_kernel void @uniform_trunc_i32_to_i1(ptr addrspace(1) %out, i32 %x
   ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
   ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1
-  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY4]], implicit-def dead $scc
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY killed [[S_LOAD_DWORDX2_IMM1]]
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY5]], implicit-def dead $scc
   ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GCN-NEXT:   S_CMP_LT_I32 killed [[COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY6]], killed [[COPY5]], implicit-def dead $scc
+  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_LT_I32 killed [[COPY4]], killed [[S_MOV_B32_2]], implicit-def $scc
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY7]], killed [[COPY6]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -90,7 +95,8 @@ define i1 @divergent_trunc_i32_to_i1(ptr addrspace(1) %out, i32 %x, i1 %z) {
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GCN-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+  ; GCN-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[COPY2]], implicit $exec
   ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
   ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN-NEXT:   [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec
@@ -122,13 +128,14 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub3
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2
   ; GCN-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY5]], %subreg.sub1
-  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LOAD_DWORD_IMM]], implicit-def dead $scc
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY7]], implicit-def dead $scc
   ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY $scc
   ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
-  ; GCN-NEXT:   [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
-  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY7]], implicit-def dead $scc
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
+  ; GCN-NEXT:   [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY8]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.2, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -147,11 +154,12 @@ define i1 @divergent_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x, i1 %z) {
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GCN-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+  ; GCN-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[COPY3]], implicit $exec
   ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
   ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
-  ; GCN-NEXT:   [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
+  ; GCN-NEXT:   [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[COPY4]], implicit $exec
   ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   $vgpr0 = COPY [[V_CNDMASK_B32_e64_]]
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 99818df6175bdf..667a3f398c08a2 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -22,13 +22,13 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
 ; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v7, vcc
 ; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
-; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
+; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
-; SDAG-NEXT:    s_mov_b32 s5, -1
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1]
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], -1, v[2:3]
+; SDAG-NEXT:    s_mov_b32 s7, -1
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
-; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -394,13 +394,13 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
 ; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v7, vcc
 ; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
-; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
+; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
-; SDAG-NEXT:    s_mov_b32 s5, -1
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1]
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], -1, v[2:3]
+; SDAG-NEXT:    s_mov_b32 s7, -1
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
-; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -765,13 +765,13 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
 ; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
 ; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
+; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s5, -1
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1]
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], -1, v[2:3]
+; SDAG-NEXT:    s_mov_b32 s7, -1
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1123,13 +1123,13 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
 ; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
 ; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
+; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s5, -1
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1]
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], -1, v[2:3]
+; SDAG-NEXT:    s_mov_b32 s7, -1
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1509,13 +1509,13 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
 ; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
 ; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
+; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s5, -1
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1]
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], -1, v[2:3]
+; SDAG-NEXT:    s_mov_b32 s7, -1
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1860,13 +1860,13 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
 ; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
 ; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
+; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s5, -1
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1]
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], -1, v[2:3]
+; SDAG-NEXT:    s_mov_b32 s7, -1
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
+; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index 6ba66ccf71868e..138db876056712 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -501,28 +501,21 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
 ; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
@@ -533,7 +526,6 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1067,10 +1059,10 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -1899,28 +1891,21 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
 ; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
@@ -1931,7 +1916,6 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -2465,10 +2449,10 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/pr40922.ll b/llvm/test/CodeGen/PowerPC/pr40922.ll
index 9252e9a3e3aa4f..2d9add6a198579 100644
--- a/llvm/test/CodeGen/PowerPC/pr40922.ll
+++ b/llvm/test/CodeGen/PowerPC/pr40922.ll
@@ -23,11 +23,12 @@ define i32 @a() {
 ; CHECK-NEXT:    li 5, 0
 ; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    addic 6, 4, 6
-; CHECK-NEXT:    addze 5, 5
 ; CHECK-NEXT:    rlwinm 6, 6, 0, 28, 26
-; CHECK-NEXT:    andi. 5, 5, 1
+; CHECK-NEXT:    addze 5, 5
 ; CHECK-NEXT:    cmplw 1, 6, 4
-; CHECK-NEXT:    crorc 20, 1, 4
+; CHECK-NEXT:    andi. 5, 5, 1
+; CHECK-NEXT:    crnot 20, 4
+; CHECK-NEXT:    cror 20, 1, 20
 ; CHECK-NEXT:    bc 12, 20, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl e
diff --git a/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll b/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
index 3fa494e1a57ddc..a80379eab6100f 100644
--- a/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
+++ b/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
@@ -20,12 +20,13 @@ define i1 @pr84653(i32 %x) {
 ; CHECK-ZBB-LABEL: pr84653:
 ; CHECK-ZBB:       # %bb.0:
 ; CHECK-ZBB-NEXT:    sext.w a1, a0
-; CHECK-ZBB-NEXT:    lui a2, 524288
-; CHECK-ZBB-NEXT:    addi a2, a2, -1
-; CHECK-ZBB-NEXT:    xor a0, a0, a2
+; CHECK-ZBB-NEXT:    sgtz a2, a1
+; CHECK-ZBB-NEXT:    lui a3, 524288
+; CHECK-ZBB-NEXT:    addi a3, a3, -1
+; CHECK-ZBB-NEXT:    xor a0, a0, a3
 ; CHECK-ZBB-NEXT:    sext.w a0, a0
-; CHECK-ZBB-NEXT:    max a0, a0, zero
 ; CHECK-ZBB-NEXT:    slt a0, a0, a1
+; CHECK-ZBB-NEXT:    and a0, a2, a0
 ; CHECK-ZBB-NEXT:    ret
   %cmp1 = icmp sgt i32 %x, 0
   %sub = sub nsw i32 2147483647, %x  ; 0x7fffffff
@@ -51,10 +52,12 @@ define i1 @pr85190(i64 %a) {
 ; CHECK-ZBB-LABEL: pr85190:
 ; CHECK-ZBB:       # %bb.0:
 ; CHECK-ZBB-NEXT:    ori a1, a0, 7
-; CHECK-ZBB-NEXT:    li a2, -1
-; CHECK-ZBB-NEXT:    slli a2, a2, 63
-; CHECK-ZBB-NEXT:    sub a2, a2, a1
-; CHECK-ZBB-NEXT:    slt a0, a0, a2
+; CHECK-ZBB-NEXT:    slti a2, a0, 0
+; CHECK-ZBB-NEXT:    li a3, -1
+; CHECK-ZBB-NEXT:    slli a3, a3, 63
+; CHECK-ZBB-NEXT:    sub a3, a3, a1
+; CHECK-ZBB-NEXT:    slt a0, a0, a3
+; CHECK-ZBB-NEXT:    and a0, a2, a0
 ; CHECK-ZBB-NEXT:    ret
   %or = or i64 %a, 7
   %cmp1 = icmp slt i64 %a, 0
@@ -80,12 +83,13 @@ define i1 @select_to_or(i32 %x) {
 ; CHECK-ZBB-LABEL: select_to_or:
 ; CHECK-ZBB:       # %bb.0:
 ; CHECK-ZBB-NEXT:    sext.w a1, a0
-; CHECK-ZBB-NEXT:    lui a2, 524288
-; CHECK-ZBB-NEXT:    addi a2, a2, -1
-; CHECK-ZBB-NEXT:    xor a0, a0, a2
+; CHECK-ZBB-NEXT:    sgtz a2, a1
+; CHECK-ZBB-NEXT:    lui a3, 524288
+; CHECK-ZBB-NEXT:    addi a3, a3, -1
+; CHECK-ZBB-NEXT:    xor a0, a0, a3
 ; CHECK-ZBB-NEXT:    sext.w a0, a0
-; CHECK-ZBB-NEXT:    min a0, a0, zero
 ; CHECK-ZBB-NEXT:    slt a0, a0, a1
+; CHECK-ZBB-NEXT:    or a0, a2, a0
 ; CHECK-ZBB-NEXT:    ret
   %cmp1 = icmp sgt i32 %x, 0
   %sub = sub nsw i32 2147483647, %x  ; 0x7fffffff
diff --git a/llvm/test/CodeGen/SystemZ/pr60413.ll b/llvm/test/CodeGen/SystemZ/pr60413.ll
index 5a629567d07069..aae254359f334d 100644
--- a/llvm/test/CodeGen/SystemZ/pr60413.ll
+++ b/llvm/test/CodeGen/SystemZ/pr60413.ll
@@ -13,114 +13,91 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
 define dso_local void @m() local_unnamed_addr #1 {
 ; CHECK-LABEL: m:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    aghi %r15, -168
-; CHECK-NEXT:    llhrl %r2, f+4
-; CHECK-NEXT:    sll %r2, 8
-; CHECK-NEXT:    larl %r1, f
-; CHECK-NEXT:    ic %r2, 6(%r1)
-; CHECK-NEXT:    larl %r1, e
-; CHECK-NEXT:    lb %r0, 3(%r1)
-; CHECK-NEXT:    clfi %r2, 128
+; CHECK-NEXT:    lhrl %r1, f+4
+; CHECK-NEXT:    sll %r1, 8
+; CHECK-NEXT:    larl %r2, f
+; CHECK-NEXT:    ic %r1, 6(%r2)
+; CHECK-NEXT:    larl %r2, e
+; CHECK-NEXT:    lb %r0, 3(%r2)
+; CHECK-NEXT:    vlvgp %v0, %r1, %r1
+; CHECK-NEXT:    nilh %r1, 255
+; CHECK-NEXT:    chi %r1, 128
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    risbg %r1, %r1, 63, 191, 36
-; CHECK-NEXT:    vlvgp %v1, %r2, %r0
-; CHECK-NEXT:    vlvgf %v1, %r2, 0
-; CHECK-NEXT:    vlvgf %v1, %r2, 2
-; CHECK-NEXT:    vlvgp %v0, %r0, %r2
-; CHECK-NEXT:    vlvgp %v2, %r2, %r2
-; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
-; CHECK-NEXT:    nilh %r2, 255
-; CHECK-NEXT:    chi %r2, 128
-; CHECK-NEXT:    ipm %r2
-; CHECK-NEXT:    risbg %r2, %r2, 63, 191, 36
-; CHECK-NEXT:    vlvgf %v0, %r0, 0
-; CHECK-NEXT:    vlvgf %v0, %r0, 2
-; CHECK-NEXT:    vrepf %v2, %v2, 1
-; CHECK-NEXT:    vgbm %v3, 30583
-; CHECK-NEXT:    vn %v0, %v0, %v3
-; CHECK-NEXT:    vn %v1, %v1, %v3
-; CHECK-NEXT:    vn %v2, %v2, %v3
-; CHECK-NEXT:    vrepif %v3, 127
-; CHECK-NEXT:    vchlf %v1, %v1, %v3
-; CHECK-NEXT:    vlgvf %r12, %v1, 0
-; CHECK-NEXT:    vchlf %v2, %v2, %v3
-; CHECK-NEXT:    vlgvf %r4, %v2, 1
-; CHECK-NEXT:    nilf %r4, 1
-; CHECK-NEXT:    vlgvf %r5, %v2, 0
-; CHECK-NEXT:    risbg %r3, %r5, 48, 176, 15
-; CHECK-NEXT:    rosbg %r3, %r4, 32, 49, 14
-; CHECK-NEXT:    vlgvf %r14, %v2, 2
-; CHECK-NEXT:    nilf %r14, 1
-; CHECK-NEXT:    rosbg %r3, %r14, 32, 50, 13
-; CHECK-NEXT:    vlgvf %r13, %v2, 3
-; CHECK-NEXT:    nilf %r13, 1
-; CHECK-NEXT:    rosbg %r3, %r13, 32, 51, 12
-; CHECK-NEXT:    rosbg %r3, %r12, 52, 52, 11
-; CHECK-NEXT:    vlgvf %r12, %v1, 1
-; CHECK-NEXT:    rosbg %r3, %r12, 53, 53, 10
-; CHECK-NEXT:    vlgvf %r12, %v1, 2
-; CHECK-NEXT:    rosbg %r3, %r12, 54, 54, 9
-; CHECK-NEXT:    vlgvf %r12, %v1, 3
-; CHECK-NEXT:    rosbg %r3, %r12, 55, 55, 8
-; CHECK-NEXT:    vchlf %v0, %v0, %v3
-; CHECK-NEXT:    vlgvf %r12, %v0, 0
-; CHECK-NEXT:    rosbg %r3, %r12, 56, 56, 7
-; CHECK-NEXT:    vlgvf %r12, %v0, 1
-; CHECK-NEXT:    rosbg %r3, %r12, 57, 57, 6
-; CHECK-NEXT:    vlgvf %r12, %v0, 2
-; CHECK-NEXT:    rosbg %r3, %r12, 58, 58, 5
-; CHECK-NEXT:    vlgvf %r12, %v0, 3
-; CHECK-NEXT:    rosbg %r3, %r12, 59, 59, 4
+; CHECK-NEXT:    vrepf %v0, %v0, 1
+; CHECK-NEXT:    vgbm %v1, 30583
+; CHECK-NEXT:    vn %v0, %v0, %v1
+; CHECK-NEXT:    vrepif %v1, 127
+; CHECK-NEXT:    vchlf %v0, %v0, %v1
+; CHECK-NEXT:    vlgvf %r3, %v0, 1
+; CHECK-NEXT:    nilf %r3, 1
+; CHECK-NEXT:    vlgvf %r4, %v0, 0
+; CHECK-NEXT:    risbg %r2, %r4, 48, 176, 15
+; CHECK-NEXT:    rosbg %r2, %r3, 32, 49, 14
+; CHECK-NEXT:    vlgvf %r5, %v0, 2
 ; CHECK-NEXT:    nilf %r5, 1
-; CHECK-NEXT:    rosbg %r3, %r5, 32, 60, 3
-; CHECK-NEXT:    rosbg %r3, %r4, 32, 61, 2
-; CHECK-NEXT:    rosbg %r3, %r14, 32, 62, 1
-; CHECK-NEXT:    or %r3, %r13
-; CHECK-NEXT:    vlgvb %r5, %v0, 1
-; CHECK-NEXT:    vlgvb %r4, %v0, 0
-; CHECK-NEXT:    risbg %r4, %r4, 48, 176, 15
-; CHECK-NEXT:    rosbg %r4, %r5, 49, 49, 14
-; CHECK-NEXT:    vlgvb %r5, %v0, 2
-; CHECK-NEXT:    rosbg %r4, %r5, 50, 50, 13
-; CHECK-NEXT:    vlgvb %r5, %v0, 3
-; CHECK-NEXT:    rosbg %r4, %r5, 51, 51, 12
-; CHECK-NEXT:    vlgvb %r5, %v0, 4
-; CHECK-NEXT:    rosbg %r4, %r5, 52, 52, 11
-; CHECK-NEXT:    vlgvb %r5, %v0, 5
-; CHECK-NEXT:    rosbg %r4, %r5, 53, 53, 10
-; CHECK-NEXT:    vlgvb %r5, %v0, 6
-; CHECK-NEXT:    rosbg %r4, %r5, 54, 54, 9
-; CHECK-NEXT:    vlgvb %r5, %v0, 7
-; CHECK-NEXT:    rosbg %r4, %r5, 55, 55, 8
-; CHECK-NEXT:    vlgvb %r5, %v0, 8
-; CHECK-NEXT:    rosbg %r4, %r5, 56, 56, 7
-; CHECK-NEXT:    vlgvb %r5, %v0, 9
-; CHECK-NEXT:    rosbg %r4, %r5, 57, 57, 6
-; CHECK-NEXT:    vlgvb %r5, %v0, 10
-; CHECK-NEXT:    rosbg %r4, %r5, 58, 58, 5
-; CHECK-NEXT:    vlgvb %r5, %v0, 11
-; CHECK-NEXT:    rosbg %r4, %r5, 59, 59, 4
-; CHECK-NEXT:    vlgvb %r5, %v0, 12
-; CHECK-NEXT:    rosbg %r4, %r5, 60, 60, 3
-; CHECK-NEXT:    vlgvb %r5, %v0, 13
-; CHECK-NEXT:    rosbg %r4, %r5, 61, 61, 2
-; CHECK-NEXT:    vlgvb %r5, %v0, 14
-; CHECK-NEXT:    rosbg %r4, %r5, 62, 62, 1
-; CHECK-NEXT:    vlgvb %r5, %v0, 15
-; CHECK-NEXT:    rosbg %r4, %r5, 63, 63, 0
-; CHECK-NEXT:    xilf %r4, 4294967295
-; CHECK-NEXT:    or %r4, %r3
-; CHECK-NEXT:    tmll %r4, 65535
-; CHECK-NEXT:    ipm %r3
-; CHECK-NEXT:    afi %r3, -268435456
-; CHECK-NEXT:    srl %r3, 31
+; CHECK-NEXT:    rosbg %r2, %r5, 32, 50, 13
+; CHECK-NEXT:    vlgvf %r14, %v0, 3
+; CHECK-NEXT:    nilf %r14, 1
+; CHECK-NEXT:    rosbg %r2, %r14, 32, 51, 12
+; CHECK-NEXT:    nilf %r4, 1
+; CHECK-NEXT:    rosbg %r2, %r4, 32, 52, 11
+; CHECK-NEXT:    rosbg %r2, %r3, 32, 53, 10
+; CHECK-NEXT:    rosbg %r2, %r5, 32, 54, 9
+; CHECK-NEXT:    rosbg %r2, %r14, 32, 55, 8
+; CHECK-NEXT:    rosbg %r2, %r4, 32, 56, 7
+; CHECK-NEXT:    rosbg %r2, %r3, 32, 57, 6
+; CHECK-NEXT:    rosbg %r2, %r5, 32, 58, 5
+; CHECK-NEXT:    rosbg %r2, %r14, 32, 59, 4
+; CHECK-NEXT:    rosbg %r2, %r4, 32, 60, 3
+; CHECK-NEXT:    rosbg %r2, %r3, 32, 61, 2
+; CHECK-NEXT:    rosbg %r2, %r5, 32, 62, 1
+; CHECK-NEXT:    or %r2, %r14
+; CHECK-NEXT:    vlgvb %r4, %v0, 1
+; CHECK-NEXT:    vlgvb %r3, %v0, 0
+; CHECK-NEXT:    risbg %r3, %r3, 48, 176, 15
+; CHECK-NEXT:    rosbg %r3, %r4, 49, 49, 14
+; CHECK-NEXT:    vlgvb %r4, %v0, 2
+; CHECK-NEXT:    rosbg %r3, %r4, 50, 50, 13
+; CHECK-NEXT:    vlgvb %r4, %v0, 3
+; CHECK-NEXT:    rosbg %r3, %r4, 51, 51, 12
+; CHECK-NEXT:    vlgvb %r4, %v0, 4
+; CHECK-NEXT:    rosbg %r3, %r4, 52, 52, 11
+; CHECK-NEXT:    vlgvb %r4, %v0, 5
+; CHECK-NEXT:    rosbg %r3, %r4, 53, 53, 10
+; CHECK-NEXT:    vlgvb %r4, %v0, 6
+; CHECK-NEXT:    rosbg %r3, %r4, 54, 54, 9
+; CHECK-NEXT:    vlgvb %r4, %v0, 7
+; CHECK-NEXT:    rosbg %r3, %r4, 55, 55, 8
+; CHECK-NEXT:    vlgvb %r4, %v0, 8
+; CHECK-NEXT:    rosbg %r3, %r4, 56, 56, 7
+; CHECK-NEXT:    vlgvb %r4, %v0, 9
+; CHECK-NEXT:    rosbg %r3, %r4, 57, 57, 6
+; CHECK-NEXT:    vlgvb %r4, %v0, 10
+; CHECK-NEXT:    rosbg %r3, %r4, 58, 58, 5
+; CHECK-NEXT:    vlgvb %r4, %v0, 11
+; CHECK-NEXT:    rosbg %r3, %r4, 59, 59, 4
+; CHECK-NEXT:    vlgvb %r4, %v0, 12
+; CHECK-NEXT:    rosbg %r3, %r4, 60, 60, 3
+; CHECK-NEXT:    vlgvb %r4, %v0, 13
+; CHECK-NEXT:    rosbg %r3, %r4, 61, 61, 2
+; CHECK-NEXT:    vlgvb %r4, %v0, 14
+; CHECK-NEXT:    rosbg %r3, %r4, 62, 62, 1
+; CHECK-NEXT:    vlgvb %r4, %v0, 15
+; CHECK-NEXT:    rosbg %r3, %r4, 63, 63, 0
+; CHECK-NEXT:    xilf %r3, 4294967295
+; CHECK-NEXT:    or %r3, %r2
+; CHECK-NEXT:    tmll %r3, 65535
+; CHECK-NEXT:    ipm %r2
+; CHECK-NEXT:    afi %r2, -268435456
+; CHECK-NEXT:    srl %r2, 31
 ; CHECK-NEXT:    nr %r2, %r1
-; CHECK-NEXT:    nr %r2, %r3
 ; CHECK-NEXT:    nr %r2, %r0
 ; CHECK-NEXT:    larl %r1, g
 ; CHECK-NEXT:    stc %r2, 0(%r1)
-; CHECK-NEXT:    lmg %r12, %r15, 264(%r15)
+; CHECK-NEXT:    lmg %r14, %r15, 280(%r15)
 ; CHECK-NEXT:    br %r14
 entry:
   %n = alloca i32, align 4
diff --git a/llvm/test/CodeGen/Thumb2/csel-andor-onebit.ll b/llvm/test/CodeGen/Thumb2/csel-andor-onebit.ll
index 5219c74e35817c..2d13c244166aa1 100644
--- a/llvm/test/CodeGen/Thumb2/csel-andor-onebit.ll
+++ b/llvm/test/CodeGen/Thumb2/csel-andor-onebit.ll
@@ -179,10 +179,10 @@ define i32 @t5(i32 %f.0, i32 %call) {
 ; CHECK-NEXT:    cset r1, ne
 ; CHECK-NEXT:    cmp r0, #13
 ; CHECK-NEXT:    cset r0, eq
-; CHECK-NEXT:    and.w r2, r0, r1
-; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    eor r0, r0, #1
-; CHECK-NEXT:    orrs r0, r2
+; CHECK-NEXT:    orr.w r2, r0, r1
+; CHECK-NEXT:    ands r0, r1
+; CHECK-NEXT:    orn r0, r0, r2
+; CHECK-NEXT:    and r0, r0, #1
 ; CHECK-NEXT:    bx lr
 entry:
   %tobool1.i = icmp ne i32 %call, 0
diff --git a/llvm/test/CodeGen/VE/Scalar/max.ll b/llvm/test/CodeGen/VE/Scalar/max.ll
index 12aa101cb48c4d..51da557c6c49fb 100644
--- a/llvm/test/CodeGen/VE/Scalar/max.ll
+++ b/llvm/test/CodeGen/VE/Scalar/max.ll
@@ -281,11 +281,13 @@ define zeroext i1 @maxi1(i1 zeroext, i1 zeroext) {
 ; CHECK-LABEL: maxi1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    or %s0, %s0, %s1
+; CHECK-NEXT:    and %s0, 1, %s0
 ; CHECK-NEXT:    b.l.t (, %s10)
 ;
 ; OPT-LABEL: maxi1:
 ; OPT:       # %bb.0:
 ; OPT-NEXT:    or %s0, %s0, %s1
+; OPT-NEXT:    and %s0, 1, %s0
 ; OPT-NEXT:    b.l.t (, %s10)
   %3 = xor i1 %1, true
   %4 = and i1 %3, %0
diff --git a/llvm/test/CodeGen/VE/Scalar/min.ll b/llvm/test/CodeGen/VE/Scalar/min.ll
index da92ebafd05903..69d5ce48601f8f 100644
--- a/llvm/test/CodeGen/VE/Scalar/min.ll
+++ b/llvm/test/CodeGen/VE/Scalar/min.ll
@@ -278,6 +278,7 @@ define i32 @min2u32(i32, i32) {
 define zeroext i1 @mini1(i1 zeroext, i1 zeroext) {
 ; CHECK-LABEL: mini1:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    and %s2, %s1, %s0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s1, %s0
 ; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
@@ -285,6 +286,7 @@ define zeroext i1 @mini1(i1 zeroext, i1 zeroext) {
 ;
 ; OPT-LABEL: mini1:
 ; OPT:       # %bb.0:
+; OPT-NEXT:    and %s0, %s0, (32)0
 ; OPT-NEXT:    and %s2, %s1, %s0
 ; OPT-NEXT:    cmov.w.ne %s2, %s1, %s0
 ; OPT-NEXT:    adds.w.zx %s0, %s2, (0)1
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index e53e194ba05c2a..ca79470d713493 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -909,9 +909,9 @@ define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) {
 define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x i32> %mask) {
 ; ALL-LABEL: ternlog_maskz_or_and_mask:
 ; ALL:       ## %bb.0:
-; ALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm3
-; ALL-NEXT:    vpsrad $31, %zmm2, %zmm0
-; ALL-NEXT:    vpternlogd $224, %zmm1, %zmm3, %zmm0
+; ALL-NEXT:    vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
+; ALL-NEXT:    vpsrad $31, %zmm2, %zmm1
+; ALL-NEXT:    vpandd %zmm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %m = icmp slt <16 x i32> %mask, zeroinitializer
   %a = and <16 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -923,9 +923,9 @@ define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
 define <8 x i64> @ternlog_maskz_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i64> %mask) {
 ; ALL-LABEL: ternlog_maskz_xor_and_mask:
 ; ALL:       ## %bb.0:
-; ALL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm3
-; ALL-NEXT:    vpsraq $63, %zmm2, %zmm0
-; ALL-NEXT:    vpternlogq $96, %zmm1, %zmm3, %zmm0
+; ALL-NEXT:    vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
+; ALL-NEXT:    vpsraq $63, %zmm2, %zmm1
+; ALL-NEXT:    vpandq %zmm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %m = icmp slt <8 x i64> %mask, zeroinitializer
   %a = and <8 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 86ebb1e40870f8..3826c9a32da20e 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -396,8 +396,8 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
 ; CHECK-LABEL: test20:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
-; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
+; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x76,0xcb]
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc9]
 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp eq <16 x i32> %x1, %y1
@@ -410,8 +410,8 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
 ; CHECK-LABEL: test21:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
-; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
+; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0xed,0x48,0x1f,0xcb,0x05]
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc9,0x02]
 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sge <8 x i64> %x1, %y1
diff --git a/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
index 500a71b7dde277..c5cb858286e821 100644
--- a/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
@@ -96,8 +96,8 @@ define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, ptr %y.ptr) nounwind {
 define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind {
 ; CHECK-LABEL: test9:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
-; CHECK-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; CHECK-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp eq <32 x i16> %x1, %y1
@@ -110,8 +110,8 @@ define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16
 define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind {
 ; CHECK-LABEL: test10:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleb %zmm1, %zmm0, %k1
-; CHECK-NEXT:    vpcmpnltb %zmm3, %zmm2, %k1 {%k1}
+; CHECK-NEXT:    vpcmpnltb %zmm3, %zmm2, %k1
+; CHECK-NEXT:    vpcmpleb %zmm1, %zmm0, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmb %zmm0, %zmm2, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <64 x i8> %x1, %y1
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
index ee750834907463..a2f4a2046a74b3 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
@@ -96,8 +96,8 @@ define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, ptr %y.ptr) nounwind
 define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind {
 ; CHECK-LABEL: test256_9:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
-; CHECK-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp eq <16 x i16> %x1, %y1
@@ -110,8 +110,8 @@ define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x
 define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind {
 ; CHECK-LABEL: test256_10:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleb %ymm1, %ymm0, %k1
-; CHECK-NEXT:    vpcmpnltb %ymm3, %ymm2, %k1 {%k1}
+; CHECK-NEXT:    vpcmpnltb %ymm3, %ymm2, %k1
+; CHECK-NEXT:    vpcmpleb %ymm1, %ymm0, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <32 x i8> %x1, %y1
@@ -246,8 +246,8 @@ define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, ptr %y.ptr) nounwind {
 define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind {
 ; CHECK-LABEL: test128_9:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
-; CHECK-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp eq <8 x i16> %x1, %y1
@@ -260,8 +260,8 @@ define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16>
 define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind {
 ; CHECK-LABEL: test128_10:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k1
-; CHECK-NEXT:    vpcmpnltb %xmm3, %xmm2, %k1 {%k1}
+; CHECK-NEXT:    vpcmpnltb %xmm3, %xmm2, %k1
+; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <16 x i8> %x1, %y1
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 58621967e2aca6..e345be8f3fca00 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -1079,9 +1079,9 @@ define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) {
 define <4 x i32> @ternlog_maskz_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %mask) {
 ; CHECK-LABEL: ternlog_maskz_or_and_mask:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
-; CHECK-NEXT:    vpsrad $31, %xmm3, %xmm0
-; CHECK-NEXT:    vpternlogd $224, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT:    vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
+; CHECK-NEXT:    vpsrad $31, %xmm3, %xmm1
+; CHECK-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %m = icmp slt <4 x i32> %mask, zeroinitializer
   %a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
@@ -1093,9 +1093,9 @@ define <4 x i32> @ternlog_maskz_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32
 define <8 x i32> @ternlog_maskz_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
 ; CHECK-LABEL: ternlog_maskz_or_and_mask_ymm:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm3
-; CHECK-NEXT:    vpsrad $31, %ymm2, %ymm0
-; CHECK-NEXT:    vpternlogd $224, %ymm1, %ymm3, %ymm0
+; CHECK-NEXT:    vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
+; CHECK-NEXT:    vpsrad $31, %ymm2, %ymm1
+; CHECK-NEXT:    vpand %ymm0, %ymm1, %ymm0
 ; CHECK-NEXT:    retq
   %m = icmp slt <8 x i32> %mask, zeroinitializer
   %a = and <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
@@ -1107,9 +1107,9 @@ define <8 x i32> @ternlog_maskz_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x
 define <2 x i64> @ternlog_maskz_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
 ; CHECK-LABEL: ternlog_maskz_xor_and_mask:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm3
-; CHECK-NEXT:    vpsraq $63, %xmm2, %xmm0
-; CHECK-NEXT:    vpternlogq $96, %xmm1, %xmm3, %xmm0
+; CHECK-NEXT:    vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
+; CHECK-NEXT:    vpsraq $63, %xmm2, %xmm1
+; CHECK-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %m = icmp slt <2 x i64> %mask, zeroinitializer
   %a = and <2 x i64> %x, <i64 1099511627775, i64 1099511627775>
@@ -1121,9 +1121,9 @@ define <2 x i64> @ternlog_maskz_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i6
 define <4 x i64> @ternlog_maskz_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
 ; CHECK-LABEL: ternlog_maskz_xor_and_mask_ymm:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm3
-; CHECK-NEXT:    vpsraq $63, %ymm2, %ymm0
-; CHECK-NEXT:    vpternlogq $96, %ymm1, %ymm3, %ymm0
+; CHECK-NEXT:    vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
+; CHECK-NEXT:    vpsraq $63, %ymm2, %ymm1
+; CHECK-NEXT:    vpand %ymm0, %ymm1, %ymm0
 ; CHECK-NEXT:    retq
   %m = icmp slt <4 x i64> %mask, zeroinitializer
   %a = and <4 x i64> %x, <i64 72057594037927935, i64 72057594037927935, i64 72057594037927935, i64 72057594037927935>
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
index 5b09e45b6fcf17..12074c292512bb 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -264,8 +264,8 @@ define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, ptr %y.ptr) nounwind {
 define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind {
 ; VLX-LABEL: test256_9:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
-; VLX-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 {%k1}
+; VLX-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 {%k1}
 ; VLX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -275,8 +275,8 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32>
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -290,8 +290,8 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32>
 define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind {
 ; VLX-LABEL: test256_10:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
-; VLX-NEXT:    vpcmpnltq %ymm3, %ymm2, %k1 {%k1}
+; VLX-NEXT:    vpcmpnltq %ymm3, %ymm2, %k1
+; VLX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 {%k1}
 ; VLX-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -301,8 +301,8 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1
+; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -326,9 +326,9 @@ define <4 x i64> @test256_11(<4 x i64> %x, ptr %y.ptr, <4 x i64> %x1, <4 x i64>
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vmovdqu (%rdi), %ymm3
-; NoVLX-NEXT:    vpcmpgtq %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
+; NoVLX-NEXT:    vmovdqu (%rdi), %ymm2
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -353,9 +353,9 @@ define <8 x i32> @test256_12(<8 x i32> %x, ptr %y.ptr, <8 x i32> %x1, <8 x i32>
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vmovdqu (%rdi), %ymm3
-; NoVLX-NEXT:    vpcmpleud %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1
+; NoVLX-NEXT:    vmovdqu (%rdi), %ymm2
+; NoVLX-NEXT:    vpcmpleud %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -819,8 +819,8 @@ define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, ptr %y.ptr) nounwind {
 define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind {
 ; VLX-LABEL: test128_9:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
-; VLX-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 {%k1}
+; VLX-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 {%k1}
 ; VLX-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -830,8 +830,8 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32>
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -845,8 +845,8 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32>
 define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind {
 ; VLX-LABEL: test128_10:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpleq %xmm1, %xmm0, %k1
-; VLX-NEXT:    vpcmpnltq %xmm3, %xmm2, %k1 {%k1}
+; VLX-NEXT:    vpcmpnltq %xmm3, %xmm2, %k1
+; VLX-NEXT:    vpcmpleq %xmm1, %xmm0, %k1 {%k1}
 ; VLX-NEXT:    vpblendmq %xmm0, %xmm2, %xmm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -856,8 +856,8 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1
+; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -881,9 +881,9 @@ define <2 x i64> @test128_11(<2 x i64> %x, ptr %y.ptr, <2 x i64> %x1, <2 x i64>
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vmovdqu (%rdi), %xmm3
-; NoVLX-NEXT:    vpcmpgtq %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
+; NoVLX-NEXT:    vmovdqu (%rdi), %xmm2
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -908,9 +908,9 @@ define <4 x i32> @test128_12(<4 x i32> %x, ptr %y.ptr, <4 x i32> %x1, <4 x i32>
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vmovdqu (%rdi), %xmm3
-; NoVLX-NEXT:    vpcmpleud %zmm3, %zmm0, %k1
-; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1
+; NoVLX-NEXT:    vmovdqu (%rdi), %xmm2
+; NoVLX-NEXT:    vpcmpleud %zmm2, %zmm0, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index fa45afbb634c4d..1a0207cba9d806 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -178,13 +178,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    subl $136, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl %edi, %eax
-; X86-NEXT:    movl %ebp, %ecx
-; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %edx, %edi
 ; X86-NEXT:    orl %eax, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sete %bl
@@ -195,30 +195,27 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    orb %bl, %al
 ; X86-NEXT:    movb %al, (%esp) # 1-byte Spill
-; X86-NEXT:    bsrl %edi, %edx
+; X86-NEXT:    bsrl %esi, %edx
 ; X86-NEXT:    xorl $31, %edx
-; X86-NEXT:    bsrl %esi, %ecx
+; X86-NEXT:    bsrl %edi, %ecx
 ; X86-NEXT:    xorl $31, %ecx
 ; X86-NEXT:    addl $32, %ecx
-; X86-NEXT:    testl %edi, %edi
-; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    testl %esi, %esi
 ; X86-NEXT:    cmovnel %edx, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    bsrl %eax, %edx
 ; X86-NEXT:    xorl $31, %edx
-; X86-NEXT:    bsrl %ebp, %ebp
-; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %esi, %ebx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    xorl $31, %ebp
 ; X86-NEXT:    addl $32, %ebp
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    cmovnel %edx, %ebp
 ; X86-NEXT:    addl $64, %ebp
-; X86-NEXT:    movl %edi, %edx
-; X86-NEXT:    orl %ebx, %edx
+; X86-NEXT:    orl %ebx, %edi
 ; X86-NEXT:    cmovnel %ecx, %ebp
 ; X86-NEXT:    bsrl %esi, %edx
-; X86-NEXT:    movl %esi, %ebx
 ; X86-NEXT:    xorl $31, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    bsrl %eax, %ecx
@@ -235,15 +232,16 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    testl %edi, %edi
 ; X86-NEXT:    cmovnel %esi, %edx
 ; X86-NEXT:    addl $64, %edx
-; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    cmovnel %ecx, %edx
 ; X86-NEXT:    subl %edx, %ebp
 ; X86-NEXT:    movl $0, %edx
 ; X86-NEXT:    sbbl %edx, %edx
 ; X86-NEXT:    movl $0, %esi
 ; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    movl $0, %edi
-; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %ebx, %ebx
 ; X86-NEXT:    movl $127, %ecx
 ; X86-NEXT:    cmpl %ebp, %ecx
 ; X86-NEXT:    movl $0, %ecx
@@ -251,7 +249,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    movl $0, %ecx
 ; X86-NEXT:    sbbl %esi, %ecx
 ; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %edi, %ecx
+; X86-NEXT:    sbbl %ebx, %ecx
 ; X86-NEXT:    setb %cl
 ; X86-NEXT:    orb (%esp), %cl # 1-byte Folded Reload
 ; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -260,18 +258,19 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    orl %esi, %eax
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %edx
 ; X86-NEXT:    orl %eax, %edx
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    testb %cl, %cl
-; X86-NEXT:    movl %ebx, %edx
-; X86-NEXT:    movl $0, %edi
-; X86-NEXT:    cmovnel %edi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    cmovnel %ebx, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    cmovnel %edi, %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    cmovnel %edi, %ebp
+; X86-NEXT:    cmovnel %ebx, %esi
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    cmovnel %ebx, %ebp
+; X86-NEXT:    movl $0, %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    cmovnel %edi, %ebx
 ; X86-NEXT:    orb %cl, %al
diff --git a/llvm/test/CodeGen/X86/fcmp-logic.ll b/llvm/test/CodeGen/X86/fcmp-logic.ll
index 4953d004e65c57..82c43f0d985be2 100644
--- a/llvm/test/CodeGen/X86/fcmp-logic.ll
+++ b/llvm/test/CodeGen/X86/fcmp-logic.ll
@@ -371,8 +371,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    xorps %xmm4, %xmm4
 ; SSE2-NEXT:    xorps %xmm5, %xmm5
-; SSE2-NEXT:    cmpltps %xmm1, %xmm5
-; SSE2-NEXT:    cmpltps %xmm0, %xmm4
+; SSE2-NEXT:    cmpltps %xmm0, %xmm5
+; SSE2-NEXT:    cmpltps %xmm1, %xmm4
 ; SSE2-NEXT:    orps %xmm5, %xmm4
 ; SSE2-NEXT:    movd %xmm4, %ecx
 ; SSE2-NEXT:    ucomiss %xmm2, %xmm3
@@ -383,8 +383,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
 ; AVX1-LABEL: f32cmp3:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vcmpltps %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vcmpltps %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vcmpltps %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %ecx
 ; AVX1-NEXT:    vucomiss %xmm2, %xmm3
@@ -397,9 +397,9 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
 ; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512-NEXT:    vxorps %xmm4, %xmm4, %xmm4
-; AVX512-NEXT:    vcmpltps %zmm1, %zmm4, %k0
-; AVX512-NEXT:    vcmpltps %zmm0, %zmm4, %k1
-; AVX512-NEXT:    korw %k0, %k1, %k0
+; AVX512-NEXT:    vcmpltps %zmm0, %zmm4, %k0
+; AVX512-NEXT:    vcmpltps %zmm1, %zmm4, %k1
+; AVX512-NEXT:    korw %k1, %k0, %k0
 ; AVX512-NEXT:    kmovw %k0, %ecx
 ; AVX512-NEXT:    vucomiss %xmm2, %xmm3
 ; AVX512-NEXT:    seta %al
diff --git a/llvm/test/CodeGen/X86/fold-select.ll b/llvm/test/CodeGen/X86/fold-select.ll
index 31afe979a33b35..fff7136219714d 100644
--- a/llvm/test/CodeGen/X86/fold-select.ll
+++ b/llvm/test/CodeGen/X86/fold-select.ll
@@ -59,11 +59,11 @@ define <8 x float> @select_or_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, <8 x f
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsllw $15, %xmm2, %xmm2
 ; CHECK-NEXT:    vpmovw2m %xmm2, %k0
-; CHECK-NEXT:    vpsllw $15, %xmm1, %xmm1
-; CHECK-NEXT:    vpmovw2m %xmm1, %k1
 ; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovw2m %xmm0, %k1
+; CHECK-NEXT:    vpsllw $15, %xmm1, %xmm0
 ; CHECK-NEXT:    vpmovw2m %xmm0, %k2
-; CHECK-NEXT:    kandnb %k1, %k2, %k1
+; CHECK-NEXT:    kandnb %k2, %k1, %k1
 ; CHECK-NEXT:    korb %k1, %k0, %k1
 ; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
 ; CHECK-NEXT:    vmovaps %ymm3, %ymm0 {%k1}
@@ -98,9 +98,9 @@ define <8 x float> @select_or_v8i1_2(i8 %m1, i8 %m2, i8 %m3, <8 x float> %d) {
 define <8 x float> @select_or_v8i1_3(<8 x i16> %m1, <8 x i16> %m2, <8 x i16> %m3, <8 x float> %d) {
 ; CHECK-LABEL: select_or_v8i1_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k1
+; CHECK-NEXT:    vpcmpeqw %xmm2, %xmm0, %k1
 ; CHECK-NEXT:    vpcmpeqw %xmm2, %xmm1, %k0
-; CHECK-NEXT:    vpcmpeqw %xmm2, %xmm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k1 {%k1}
 ; CHECK-NEXT:    korb %k1, %k0, %k1
 ; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
 ; CHECK-NEXT:    vmovaps %ymm3, %ymm0 {%k1}
diff --git a/llvm/test/CodeGen/X86/pr64589.ll b/llvm/test/CodeGen/X86/pr64589.ll
index 130ef517ae28eb..d93d54f4c31d0b 100644
--- a/llvm/test/CodeGen/X86/pr64589.ll
+++ b/llvm/test/CodeGen/X86/pr64589.ll
@@ -7,8 +7,8 @@
 define i8 @test(ptr %p) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl (%rdi), %eax
-; CHECK-NEXT:    orb 1(%rdi), %al
+; CHECK-NEXT:    movzbl 1(%rdi), %eax
+; CHECK-NEXT:    orb (%rdi), %al
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    addb %al, %al
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index ec7dca4285a355..fe17e415dbeb4b 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1541,8 +1541,9 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,1,1,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT:    psllq $63, %xmm0
 ; SSE2-NEXT:    movmskpd %xmm0, %eax
 ; SSE2-NEXT:    cmpl $3, %eax
 ; SSE2-NEXT:    sete %al
@@ -1550,26 +1551,42 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ;
 ; SSE42-LABEL: select_v2i8:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; SSE42-NEXT:    pxor %xmm0, %xmm1
-; SSE42-NEXT:    ptest %xmm1, %xmm1
+; SSE42-NEXT:    movzwl (%rdi), %eax
+; SSE42-NEXT:    movd %eax, %xmm0
+; SSE42-NEXT:    movzwl (%rsi), %eax
+; SSE42-NEXT:    movd %eax, %xmm1
+; SSE42-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE42-NEXT:    psllq $63, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %eax
+; SSE42-NEXT:    cmpl $3, %eax
 ; SSE42-NEXT:    sete %al
 ; SSE42-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: select_v2i8:
 ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1OR2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vptest %xmm0, %xmm0
-; AVX1OR2-NEXT:    sete %al
+; AVX1OR2-NEXT:    movzwl (%rdi), %eax
+; AVX1OR2-NEXT:    vmovd %eax, %xmm0
+; AVX1OR2-NEXT:    movzwl (%rsi), %eax
+; AVX1OR2-NEXT:    vmovd %eax, %xmm1
+; AVX1OR2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1OR2-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vtestpd %xmm1, %xmm0
+; AVX1OR2-NEXT:    setb %al
 ; AVX1OR2-NEXT:    retq
 ;
 ; AVX512-LABEL: select_v2i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    movzwl (%rdi), %eax
-; AVX512-NEXT:    cmpw (%rsi), %ax
+; AVX512-NEXT:    vmovd %eax, %xmm0
+; AVX512-NEXT:    movzwl (%rsi), %eax
+; AVX512-NEXT:    vmovd %eax, %xmm1
+; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512-NEXT:    knotw %k0, %k0
+; AVX512-NEXT:    kmovd %k0, %eax
+; AVX512-NEXT:    testb $3, %al
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    retq
   %v0 = load <2 x i8>, ptr %s0, align 1
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index 951bcfa8fc1b74..4f91eb2cb0a5a1 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -1424,8 +1424,9 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,1,1,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT:    psllq $63, %xmm0
 ; SSE2-NEXT:    movmskpd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
 ; SSE2-NEXT:    setne %al
@@ -1433,19 +1434,27 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ;
 ; SSE42-LABEL: select_v2i8:
 ; SSE42:       # %bb.0:
-; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; SSE42-NEXT:    pcmpeqq %xmm0, %xmm1
-; SSE42-NEXT:    movmskpd %xmm1, %eax
+; SSE42-NEXT:    movzwl (%rdi), %eax
+; SSE42-NEXT:    movd %eax, %xmm0
+; SSE42-NEXT:    movzwl (%rsi), %eax
+; SSE42-NEXT:    movd %eax, %xmm1
+; SSE42-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE42-NEXT:    psllq $63, %xmm0
+; SSE42-NEXT:    movmskpd %xmm0, %eax
 ; SSE42-NEXT:    testl %eax, %eax
 ; SSE42-NEXT:    setne %al
 ; SSE42-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: select_v2i8:
 ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1OR2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    movzwl (%rdi), %eax
+; AVX1OR2-NEXT:    vmovd %eax, %xmm0
+; AVX1OR2-NEXT:    movzwl (%rsi), %eax
+; AVX1OR2-NEXT:    vmovd %eax, %xmm1
+; AVX1OR2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1OR2-NEXT:    vpsllq $63, %xmm0, %xmm0
 ; AVX1OR2-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq

>From 568e0a6d0f9cf8706280544e2d6d6292ab9adb48 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Fri, 5 Apr 2024 10:57:08 +0200
Subject: [PATCH 3/3] [DAGCombiner] Push freeze through SETCC and SELECT_CC

Allow pushing freeze through SETCC and SELECT_CC even if there are
multiple "maybe poison" operands. In the past we have limited it to
a single "maybe poison" operand, but it seems profitable to also
allow the multiple operand scenario.

One goal here is to avoid some regressions seen in review of
  https://github.com/llvm/llvm-project/pull/84924
when solving the select->and miscompiles described in
  https://github.com/llvm/llvm-project/issues/84653
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   4 +-
 llvm/test/CodeGen/AArch64/cmp-chains.ll       | 188 +++---
 .../CodeGen/AArch64/select-with-and-or.ll     |  26 +-
 llvm/test/CodeGen/PowerPC/pr40922.ll          |   7 +-
 llvm/test/CodeGen/RISCV/double-convert.ll     |  58 +-
 .../CodeGen/RISCV/double-round-conv-sat.ll    | 540 +++++++++---------
 llvm/test/CodeGen/RISCV/pr84653_pr85190.ll    |  29 +-
 llvm/test/CodeGen/X86/avx512-vec-cmp.ll       |   8 +-
 llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll     |   8 +-
 llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll   |  16 +-
 llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll     |  56 +-
 llvm/test/CodeGen/X86/fold-select.ll          |   4 +-
 .../test/CodeGen/X86/setcc-non-simple-type.ll |  78 +--
 .../test/CodeGen/X86/vector-compare-all_of.ll |  11 +-
 .../test/CodeGen/X86/vector-compare-any_of.ll |  11 +-
 15 files changed, 555 insertions(+), 489 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 50046d380e2d80..8daa74d801b5e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15442,7 +15442,9 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
       N0->getNumValues() != 1 || !N0->hasOneUse())
     return SDValue();
 
-  bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
+  bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::SELECT_CC ||
+                                          N0.getOpcode() == ISD::SETCC ||
+                                          N0.getOpcode() == ISD::BUILD_VECTOR ||
                                           N0.getOpcode() == ISD::BUILD_PAIR ||
                                           N0.getOpcode() == ISD::CONCAT_VECTORS;
 
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 8cb525f14cc813..1d9f39e5185939 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -6,14 +6,21 @@
 
 ; (x0 < x1) && (x2 > x3)
 define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
-; CHECK-LABEL: cmp_and2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, hi
-; CHECK-NEXT:    and w0, w8, w9
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: cmp_and2:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w1
+; SDISEL-NEXT:    ccmp w2, w3, #0, lo
+; SDISEL-NEXT:    cset w0, hi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_and2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    cset w8, lo
+; GISEL-NEXT:    cmp w2, w3
+; GISEL-NEXT:    cset w9, hi
+; GISEL-NEXT:    and w0, w8, w9
+; GISEL-NEXT:    ret
   %5 = icmp ult i32 %0, %1
   %6 = icmp ugt i32 %2, %3
   %7 = select i1 %5, i1 %6, i1 false
@@ -23,17 +30,25 @@ define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
 
 ; (x0 < x1) && (x2 > x3) && (x4 != x5)
 define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
-; CHECK-LABEL: cmp_and3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, hi
-; CHECK-NEXT:    cmp w4, w5
-; CHECK-NEXT:    and w8, w8, w9
-; CHECK-NEXT:    cset w9, ne
-; CHECK-NEXT:    and w0, w8, w9
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: cmp_and3:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w1
+; SDISEL-NEXT:    ccmp w2, w3, #0, lo
+; SDISEL-NEXT:    ccmp w4, w5, #4, hi
+; SDISEL-NEXT:    cset w0, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_and3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    cset w8, lo
+; GISEL-NEXT:    cmp w2, w3
+; GISEL-NEXT:    cset w9, hi
+; GISEL-NEXT:    cmp w4, w5
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    cset w9, ne
+; GISEL-NEXT:    and w0, w8, w9
+; GISEL-NEXT:    ret
   %7 = icmp ult i32 %0, %1
   %8 = icmp ugt i32 %2, %3
   %9 = select i1 %7, i1 %8, i1 false
@@ -45,20 +60,29 @@ define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
 
 ; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
 define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
-; CHECK-LABEL: cmp_and4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    cmp w4, w5
-; CHECK-NEXT:    cset w10, ne
-; CHECK-NEXT:    cmp w6, w7
-; CHECK-NEXT:    and w8, w8, w9
-; CHECK-NEXT:    cset w11, eq
-; CHECK-NEXT:    and w9, w10, w11
-; CHECK-NEXT:    and w0, w8, w9
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: cmp_and4:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w2, w3
+; SDISEL-NEXT:    ccmp w0, w1, #2, hi
+; SDISEL-NEXT:    ccmp w4, w5, #4, lo
+; SDISEL-NEXT:    ccmp w6, w7, #0, ne
+; SDISEL-NEXT:    cset w0, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_and4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmp w2, w3
+; GISEL-NEXT:    cset w8, hi
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    cset w9, lo
+; GISEL-NEXT:    cmp w4, w5
+; GISEL-NEXT:    cset w10, ne
+; GISEL-NEXT:    cmp w6, w7
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    cset w11, eq
+; GISEL-NEXT:    and w9, w10, w11
+; GISEL-NEXT:    and w0, w8, w9
+; GISEL-NEXT:    ret
   %9 = icmp ugt i32 %2, %3
   %10 = icmp ult i32 %0, %1
   %11 = select i1 %9, i1 %10, i1 false
@@ -72,15 +96,22 @@ define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
 
 ; (x0 < x1) || (x2 > x3)
 define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
-; CHECK-LABEL: cmp_or2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, ne
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: cmp_or2:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w1
+; SDISEL-NEXT:    ccmp w2, w3, #0, hs
+; SDISEL-NEXT:    cset w0, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_or2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    cset w8, lo
+; GISEL-NEXT:    cmp w2, w3
+; GISEL-NEXT:    cset w9, ne
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    ret
   %5 = icmp ult i32 %0, %1
   %6 = icmp ne i32 %2, %3
   %7 = select i1 %5, i1 true, i1 %6
@@ -90,18 +121,26 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
 
 ; (x0 < x1) || (x2 > x3) || (x4 != x5)
 define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
-; CHECK-LABEL: cmp_or3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, hi
-; CHECK-NEXT:    cmp w4, w5
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    cset w9, ne
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: cmp_or3:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w1
+; SDISEL-NEXT:    ccmp w2, w3, #2, hs
+; SDISEL-NEXT:    ccmp w4, w5, #0, ls
+; SDISEL-NEXT:    cset w0, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_or3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    cset w8, lo
+; GISEL-NEXT:    cmp w2, w3
+; GISEL-NEXT:    cset w9, hi
+; GISEL-NEXT:    cmp w4, w5
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    cset w9, ne
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    ret
   %7 = icmp ult i32 %0, %1
   %8 = icmp ugt i32 %2, %3
   %9 = select i1 %7, i1 true, i1 %8
@@ -113,21 +152,30 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
 
 ; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
 define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
-; CHECK-LABEL: cmp_or4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, hi
-; CHECK-NEXT:    cmp w4, w5
-; CHECK-NEXT:    cset w10, ne
-; CHECK-NEXT:    cmp w6, w7
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    cset w11, eq
-; CHECK-NEXT:    orr w9, w10, w11
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    and w0, w8, #0x1
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: cmp_or4:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w1
+; SDISEL-NEXT:    ccmp w2, w3, #2, hs
+; SDISEL-NEXT:    ccmp w4, w5, #0, ls
+; SDISEL-NEXT:    ccmp w6, w7, #4, eq
+; SDISEL-NEXT:    cset w0, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_or4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    cset w8, lo
+; GISEL-NEXT:    cmp w2, w3
+; GISEL-NEXT:    cset w9, hi
+; GISEL-NEXT:    cmp w4, w5
+; GISEL-NEXT:    cset w10, ne
+; GISEL-NEXT:    cmp w6, w7
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    cset w11, eq
+; GISEL-NEXT:    orr w9, w10, w11
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    ret
   %9 = icmp ult i32 %0, %1
   %10 = icmp ugt i32 %2, %3
   %11 = select i1 %9, i1 true, i1 %10
@@ -194,3 +242,5 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
   %9 = zext i1 %8 to i32
   ret i32 %9
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
index 1fdb9d34bf1024..84b6818eaa739c 100644
--- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll
+++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
@@ -5,10 +5,8 @@ define i1 @and(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: and:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, eq
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, gt
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    ccmp w2, w3, #4, eq
+; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
@@ -20,11 +18,8 @@ define i1 @or(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: or:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, eq
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, gt
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ccmp w2, w3, #0, ne
+; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
@@ -36,10 +31,8 @@ define i1 @and_not(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: and_not:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, ne
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, gt
-; CHECK-NEXT:    and w0, w8, w9
+; CHECK-NEXT:    ccmp w2, w3, #4, ne
+; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
@@ -51,11 +44,8 @@ define i1 @or_not(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: or_not:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, ne
-; CHECK-NEXT:    cmp w2, w3
-; CHECK-NEXT:    cset w9, gt
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ccmp w2, w3, #0, eq
+; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
   %a = icmp eq i32 %x, %y
   %b = icmp sgt i32 %z, %w
diff --git a/llvm/test/CodeGen/PowerPC/pr40922.ll b/llvm/test/CodeGen/PowerPC/pr40922.ll
index 2d9add6a198579..9252e9a3e3aa4f 100644
--- a/llvm/test/CodeGen/PowerPC/pr40922.ll
+++ b/llvm/test/CodeGen/PowerPC/pr40922.ll
@@ -23,12 +23,11 @@ define i32 @a() {
 ; CHECK-NEXT:    li 5, 0
 ; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    addic 6, 4, 6
-; CHECK-NEXT:    rlwinm 6, 6, 0, 28, 26
 ; CHECK-NEXT:    addze 5, 5
-; CHECK-NEXT:    cmplw 1, 6, 4
+; CHECK-NEXT:    rlwinm 6, 6, 0, 28, 26
 ; CHECK-NEXT:    andi. 5, 5, 1
-; CHECK-NEXT:    crnot 20, 4
-; CHECK-NEXT:    cror 20, 1, 20
+; CHECK-NEXT:    cmplw 1, 6, 4
+; CHECK-NEXT:    crorc 20, 1, 4
 ; CHECK-NEXT:    bc 12, 20, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl e
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index da882cafd99715..e6e64a506fab6e 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -692,28 +692,27 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
 ; RV32IFD-NEXT:    fle.d s0, fa5, fa0
 ; RV32IFD-NEXT:    call __fixdfdi
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB12_2
+; RV32IFD-NEXT:    beqz s0, .LBB12_2
 ; RV32IFD-NEXT:  # %bb.1: # %start
 ; RV32IFD-NEXT:    mv a2, a1
 ; RV32IFD-NEXT:  .LBB12_2: # %start
 ; RV32IFD-NEXT:    lui a1, %hi(.LCPI12_1)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI12_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB12_4
+; RV32IFD-NEXT:    flt.d a3, fa5, fs0
+; RV32IFD-NEXT:    beqz a3, .LBB12_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a2, a4, -1
 ; RV32IFD-NEXT:  .LBB12_4: # %start
 ; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    neg a4, a1
+; RV32IFD-NEXT:    and a1, a4, a2
+; RV32IFD-NEXT:    neg a2, a3
+; RV32IFD-NEXT:    neg a3, s0
 ; RV32IFD-NEXT:    and a0, a3, a0
+; RV32IFD-NEXT:    or a0, a2, a0
+; RV32IFD-NEXT:    and a0, a4, a0
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
@@ -735,39 +734,40 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32IZFINXZDINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINXZDINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINXZDINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT:    mv s1, a1
-; RV32IZFINXZDINX-NEXT:    mv s0, a0
-; RV32IZFINXZDINX-NEXT:    call __fixdfdi
+; RV32IZFINXZDINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI12_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI12_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI12_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB12_2
+; RV32IZFINXZDINX-NEXT:    mv s1, a1
+; RV32IZFINXZDINX-NEXT:    mv s0, a0
+; RV32IZFINXZDINX-NEXT:    fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT:    call __fixdfdi
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    lui a2, 524288
+; RV32IZFINXZDINX-NEXT:    beqz s2, .LBB12_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1: # %start
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a2, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB12_2: # %start
 ; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI12_1)
 ; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI12_1)(a1)
 ; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI12_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB12_4
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a6, s0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB12_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB12_4: # %start
 ; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
+; RV32IZFINXZDINX-NEXT:    neg a4, a1
+; RV32IZFINXZDINX-NEXT:    and a1, a4, a2
+; RV32IZFINXZDINX-NEXT:    neg a2, s2
 ; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
 ; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a0, a4, a0
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINXZDINX-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
index f1c56b320b76c4..64ff813ccf9daa 100644
--- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -43,42 +43,47 @@ define signext i32 @test_floor_si32(double %x) {
 define i64 @test_floor_si64(double %x) nounwind {
 ; RV32IFD-LABEL: test_floor_si64:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32IFD-NEXT:    call floor
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_0)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI1_0)(a0)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IFD-NEXT:    fld fa4, %lo(.LCPI1_1)(a0)
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
-; RV32IFD-NEXT:    fle.d s0, fa5, fa0
+; RV32IFD-NEXT:    flt.d s0, fa5, fa0
+; RV32IFD-NEXT:    neg s1, s0
+; RV32IFD-NEXT:    fle.d s2, fa4, fa0
+; RV32IFD-NEXT:    neg s3, s2
 ; RV32IFD-NEXT:    call __fixdfdi
+; RV32IFD-NEXT:    and a0, s3, a0
+; RV32IFD-NEXT:    or a0, s1, a0
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    neg a2, a2
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB1_2
+; RV32IFD-NEXT:    beqz s2, .LBB1_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    mv a3, a1
 ; RV32IFD-NEXT:  .LBB1_2:
-; RV32IFD-NEXT:    lui a1, %hi(.LCPI1_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI1_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB1_4
+; RV32IFD-NEXT:    and a0, a2, a0
+; RV32IFD-NEXT:    beqz s0, .LBB1_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a3, a4, -1
 ; RV32IFD-NEXT:  .LBB1_4:
-; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
-; RV32IFD-NEXT:    and a0, a3, a0
-; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    and a1, a2, a3
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: test_floor_si64:
@@ -103,30 +108,29 @@ define i64 @test_floor_si64(double %x) nounwind {
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI1_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI1_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI1_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT:    lui a4, %hi(.LCPI1_1)
+; RV32IZFINXZDINX-NEXT:    lw a5, %lo(.LCPI1_1+4)(a4)
+; RV32IZFINXZDINX-NEXT:    lw a4, %lo(.LCPI1_1)(a4)
+; RV32IZFINXZDINX-NEXT:    fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a6
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
+; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a2
 ; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB1_2
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    beqz a6, .LBB1_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1:
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a4, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB1_2:
-; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI1_1)
-; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI1_1)(a1)
-; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI1_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB1_4
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB1_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a4, a5, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB1_4:
-; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
-; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
-; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a1, a2, a4
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
@@ -289,42 +293,47 @@ define signext i32 @test_ceil_si32(double %x) {
 define i64 @test_ceil_si64(double %x) nounwind {
 ; RV32IFD-LABEL: test_ceil_si64:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32IFD-NEXT:    call ceil
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_0)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI5_0)(a0)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IFD-NEXT:    fld fa4, %lo(.LCPI5_1)(a0)
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
-; RV32IFD-NEXT:    fle.d s0, fa5, fa0
+; RV32IFD-NEXT:    flt.d s0, fa5, fa0
+; RV32IFD-NEXT:    neg s1, s0
+; RV32IFD-NEXT:    fle.d s2, fa4, fa0
+; RV32IFD-NEXT:    neg s3, s2
 ; RV32IFD-NEXT:    call __fixdfdi
+; RV32IFD-NEXT:    and a0, s3, a0
+; RV32IFD-NEXT:    or a0, s1, a0
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    neg a2, a2
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB5_2
+; RV32IFD-NEXT:    beqz s2, .LBB5_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    mv a3, a1
 ; RV32IFD-NEXT:  .LBB5_2:
-; RV32IFD-NEXT:    lui a1, %hi(.LCPI5_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI5_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB5_4
+; RV32IFD-NEXT:    and a0, a2, a0
+; RV32IFD-NEXT:    beqz s0, .LBB5_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a3, a4, -1
 ; RV32IFD-NEXT:  .LBB5_4:
-; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
-; RV32IFD-NEXT:    and a0, a3, a0
-; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    and a1, a2, a3
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: test_ceil_si64:
@@ -349,30 +358,29 @@ define i64 @test_ceil_si64(double %x) nounwind {
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI5_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI5_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI5_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT:    lui a4, %hi(.LCPI5_1)
+; RV32IZFINXZDINX-NEXT:    lw a5, %lo(.LCPI5_1+4)(a4)
+; RV32IZFINXZDINX-NEXT:    lw a4, %lo(.LCPI5_1)(a4)
+; RV32IZFINXZDINX-NEXT:    fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a6
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
+; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a2
 ; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB5_2
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    beqz a6, .LBB5_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1:
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a4, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB5_2:
-; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI5_1)
-; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI5_1)(a1)
-; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI5_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB5_4
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB5_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a4, a5, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB5_4:
-; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
-; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
-; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a1, a2, a4
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
@@ -535,42 +543,47 @@ define signext i32 @test_trunc_si32(double %x) {
 define i64 @test_trunc_si64(double %x) nounwind {
 ; RV32IFD-LABEL: test_trunc_si64:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32IFD-NEXT:    call trunc
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_0)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI9_0)(a0)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IFD-NEXT:    fld fa4, %lo(.LCPI9_1)(a0)
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
-; RV32IFD-NEXT:    fle.d s0, fa5, fa0
+; RV32IFD-NEXT:    flt.d s0, fa5, fa0
+; RV32IFD-NEXT:    neg s1, s0
+; RV32IFD-NEXT:    fle.d s2, fa4, fa0
+; RV32IFD-NEXT:    neg s3, s2
 ; RV32IFD-NEXT:    call __fixdfdi
+; RV32IFD-NEXT:    and a0, s3, a0
+; RV32IFD-NEXT:    or a0, s1, a0
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    neg a2, a2
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB9_2
+; RV32IFD-NEXT:    beqz s2, .LBB9_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    mv a3, a1
 ; RV32IFD-NEXT:  .LBB9_2:
-; RV32IFD-NEXT:    lui a1, %hi(.LCPI9_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI9_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB9_4
+; RV32IFD-NEXT:    and a0, a2, a0
+; RV32IFD-NEXT:    beqz s0, .LBB9_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a3, a4, -1
 ; RV32IFD-NEXT:  .LBB9_4:
-; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
-; RV32IFD-NEXT:    and a0, a3, a0
-; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    and a1, a2, a3
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: test_trunc_si64:
@@ -595,30 +608,29 @@ define i64 @test_trunc_si64(double %x) nounwind {
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI9_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI9_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI9_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT:    lui a4, %hi(.LCPI9_1)
+; RV32IZFINXZDINX-NEXT:    lw a5, %lo(.LCPI9_1+4)(a4)
+; RV32IZFINXZDINX-NEXT:    lw a4, %lo(.LCPI9_1)(a4)
+; RV32IZFINXZDINX-NEXT:    fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a6
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
+; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a2
 ; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB9_2
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    beqz a6, .LBB9_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1:
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a4, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB9_2:
-; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI9_1)
-; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI9_1)(a1)
-; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI9_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB9_4
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB9_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a4, a5, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB9_4:
-; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
-; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
-; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a1, a2, a4
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
@@ -781,42 +793,47 @@ define signext i32 @test_round_si32(double %x) {
 define i64 @test_round_si64(double %x) nounwind {
 ; RV32IFD-LABEL: test_round_si64:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32IFD-NEXT:    call round
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_0)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IFD-NEXT:    fld fa4, %lo(.LCPI13_1)(a0)
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
-; RV32IFD-NEXT:    fle.d s0, fa5, fa0
+; RV32IFD-NEXT:    flt.d s0, fa5, fa0
+; RV32IFD-NEXT:    neg s1, s0
+; RV32IFD-NEXT:    fle.d s2, fa4, fa0
+; RV32IFD-NEXT:    neg s3, s2
 ; RV32IFD-NEXT:    call __fixdfdi
+; RV32IFD-NEXT:    and a0, s3, a0
+; RV32IFD-NEXT:    or a0, s1, a0
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    neg a2, a2
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB13_2
+; RV32IFD-NEXT:    beqz s2, .LBB13_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    mv a3, a1
 ; RV32IFD-NEXT:  .LBB13_2:
-; RV32IFD-NEXT:    lui a1, %hi(.LCPI13_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI13_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB13_4
+; RV32IFD-NEXT:    and a0, a2, a0
+; RV32IFD-NEXT:    beqz s0, .LBB13_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a3, a4, -1
 ; RV32IFD-NEXT:  .LBB13_4:
-; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
-; RV32IFD-NEXT:    and a0, a3, a0
-; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    and a1, a2, a3
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: test_round_si64:
@@ -841,30 +858,29 @@ define i64 @test_round_si64(double %x) nounwind {
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI13_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI13_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI13_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT:    lui a4, %hi(.LCPI13_1)
+; RV32IZFINXZDINX-NEXT:    lw a5, %lo(.LCPI13_1+4)(a4)
+; RV32IZFINXZDINX-NEXT:    lw a4, %lo(.LCPI13_1)(a4)
+; RV32IZFINXZDINX-NEXT:    fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a6
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
+; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a2
 ; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB13_2
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    beqz a6, .LBB13_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1:
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a4, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB13_2:
-; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI13_1)
-; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI13_1)(a1)
-; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI13_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB13_4
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB13_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a4, a5, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB13_4:
-; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
-; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
-; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a1, a2, a4
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
@@ -1027,42 +1043,47 @@ define signext i32 @test_roundeven_si32(double %x) {
 define i64 @test_roundeven_si64(double %x) nounwind {
 ; RV32IFD-LABEL: test_roundeven_si64:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32IFD-NEXT:    call roundeven
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_0)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI17_0)(a0)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IFD-NEXT:    fld fa4, %lo(.LCPI17_1)(a0)
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
-; RV32IFD-NEXT:    fle.d s0, fa5, fa0
+; RV32IFD-NEXT:    flt.d s0, fa5, fa0
+; RV32IFD-NEXT:    neg s1, s0
+; RV32IFD-NEXT:    fle.d s2, fa4, fa0
+; RV32IFD-NEXT:    neg s3, s2
 ; RV32IFD-NEXT:    call __fixdfdi
+; RV32IFD-NEXT:    and a0, s3, a0
+; RV32IFD-NEXT:    or a0, s1, a0
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    neg a2, a2
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB17_2
+; RV32IFD-NEXT:    beqz s2, .LBB17_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    mv a3, a1
 ; RV32IFD-NEXT:  .LBB17_2:
-; RV32IFD-NEXT:    lui a1, %hi(.LCPI17_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI17_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB17_4
+; RV32IFD-NEXT:    and a0, a2, a0
+; RV32IFD-NEXT:    beqz s0, .LBB17_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a3, a4, -1
 ; RV32IFD-NEXT:  .LBB17_4:
-; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
-; RV32IFD-NEXT:    and a0, a3, a0
-; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    and a1, a2, a3
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: test_roundeven_si64:
@@ -1087,30 +1108,29 @@ define i64 @test_roundeven_si64(double %x) nounwind {
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI17_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI17_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI17_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT:    lui a4, %hi(.LCPI17_1)
+; RV32IZFINXZDINX-NEXT:    lw a5, %lo(.LCPI17_1+4)(a4)
+; RV32IZFINXZDINX-NEXT:    lw a4, %lo(.LCPI17_1)(a4)
+; RV32IZFINXZDINX-NEXT:    fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a6
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
+; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a2
 ; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB17_2
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    beqz a6, .LBB17_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1:
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a4, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB17_2:
-; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI17_1)
-; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI17_1)(a1)
-; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI17_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB17_4
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB17_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a4, a5, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB17_4:
-; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
-; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
-; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a1, a2, a4
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
@@ -1273,42 +1293,47 @@ define signext i32 @test_rint_si32(double %x) {
 define i64 @test_rint_si64(double %x) nounwind {
 ; RV32IFD-LABEL: test_rint_si64:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; RV32IFD-NEXT:    call rint
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI21_0)
 ; RV32IFD-NEXT:    fld fa5, %lo(.LCPI21_0)(a0)
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI21_1)
+; RV32IFD-NEXT:    fld fa4, %lo(.LCPI21_1)(a0)
 ; RV32IFD-NEXT:    fmv.d fs0, fa0
-; RV32IFD-NEXT:    fle.d s0, fa5, fa0
+; RV32IFD-NEXT:    flt.d s0, fa5, fa0
+; RV32IFD-NEXT:    neg s1, s0
+; RV32IFD-NEXT:    fle.d s2, fa4, fa0
+; RV32IFD-NEXT:    neg s3, s2
 ; RV32IFD-NEXT:    call __fixdfdi
+; RV32IFD-NEXT:    and a0, s3, a0
+; RV32IFD-NEXT:    or a0, s1, a0
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    neg a2, a2
+; RV32IFD-NEXT:    lui a4, 524288
 ; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    li a4, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    bne s0, a4, .LBB21_2
+; RV32IFD-NEXT:    beqz s2, .LBB21_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    mv a3, a1
 ; RV32IFD-NEXT:  .LBB21_2:
-; RV32IFD-NEXT:    lui a1, %hi(.LCPI21_1)
-; RV32IFD-NEXT:    fld fa5, %lo(.LCPI21_1)(a1)
-; RV32IFD-NEXT:    flt.d a4, fa5, fs0
-; RV32IFD-NEXT:    beqz a4, .LBB21_4
+; RV32IFD-NEXT:    and a0, a2, a0
+; RV32IFD-NEXT:    beqz s0, .LBB21_4
 ; RV32IFD-NEXT:  # %bb.3:
-; RV32IFD-NEXT:    addi a2, a3, -1
+; RV32IFD-NEXT:    addi a3, a4, -1
 ; RV32IFD-NEXT:  .LBB21_4:
-; RV32IFD-NEXT:    feq.d a1, fs0, fs0
-; RV32IFD-NEXT:    neg a3, a1
-; RV32IFD-NEXT:    and a1, a3, a2
-; RV32IFD-NEXT:    neg a2, a4
-; RV32IFD-NEXT:    neg a4, s0
-; RV32IFD-NEXT:    and a0, a4, a0
-; RV32IFD-NEXT:    or a0, a2, a0
-; RV32IFD-NEXT:    and a0, a3, a0
-; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    and a1, a2, a3
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: test_rint_si64:
@@ -1333,30 +1358,29 @@ define i64 @test_rint_si64(double %x) nounwind {
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(.LCPI21_0)
 ; RV32IZFINXZDINX-NEXT:    lw a3, %lo(.LCPI21_0+4)(a2)
 ; RV32IZFINXZDINX-NEXT:    lw a2, %lo(.LCPI21_0)(a2)
-; RV32IZFINXZDINX-NEXT:    fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT:    lui a4, %hi(.LCPI21_1)
+; RV32IZFINXZDINX-NEXT:    lw a5, %lo(.LCPI21_1+4)(a4)
+; RV32IZFINXZDINX-NEXT:    lw a4, %lo(.LCPI21_1)(a4)
+; RV32IZFINXZDINX-NEXT:    fle.d a6, a2, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a6
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a3
+; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT:    neg a2, a2
 ; RV32IZFINXZDINX-NEXT:    lui a5, 524288
-; RV32IZFINXZDINX-NEXT:    li a4, 1
-; RV32IZFINXZDINX-NEXT:    lui a3, 524288
-; RV32IZFINXZDINX-NEXT:    bne a2, a4, .LBB21_2
+; RV32IZFINXZDINX-NEXT:    lui a4, 524288
+; RV32IZFINXZDINX-NEXT:    beqz a6, .LBB21_2
 ; RV32IZFINXZDINX-NEXT:  # %bb.1:
-; RV32IZFINXZDINX-NEXT:    mv a3, a1
+; RV32IZFINXZDINX-NEXT:    mv a4, a1
 ; RV32IZFINXZDINX-NEXT:  .LBB21_2:
-; RV32IZFINXZDINX-NEXT:    lui a1, %hi(.LCPI21_1)
-; RV32IZFINXZDINX-NEXT:    lw a6, %lo(.LCPI21_1)(a1)
-; RV32IZFINXZDINX-NEXT:    lw a7, %lo(.LCPI21_1+4)(a1)
-; RV32IZFINXZDINX-NEXT:    flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT:    beqz a4, .LBB21_4
+; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
+; RV32IZFINXZDINX-NEXT:    beqz a3, .LBB21_4
 ; RV32IZFINXZDINX-NEXT:  # %bb.3:
-; RV32IZFINXZDINX-NEXT:    addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT:    addi a4, a5, -1
 ; RV32IZFINXZDINX-NEXT:  .LBB21_4:
-; RV32IZFINXZDINX-NEXT:    feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT:    neg a5, a1
-; RV32IZFINXZDINX-NEXT:    and a1, a5, a3
-; RV32IZFINXZDINX-NEXT:    neg a2, a2
-; RV32IZFINXZDINX-NEXT:    and a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    neg a2, a4
-; RV32IZFINXZDINX-NEXT:    or a0, a2, a0
-; RV32IZFINXZDINX-NEXT:    and a0, a5, a0
+; RV32IZFINXZDINX-NEXT:    and a1, a2, a4
 ; RV32IZFINXZDINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINXZDINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll b/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
index a80379eab6100f..f9b9c8a69d431c 100644
--- a/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
+++ b/llvm/test/CodeGen/RISCV/pr84653_pr85190.ll
@@ -20,13 +20,12 @@ define i1 @pr84653(i32 %x) {
 ; CHECK-ZBB-LABEL: pr84653:
 ; CHECK-ZBB:       # %bb.0:
 ; CHECK-ZBB-NEXT:    sext.w a1, a0
-; CHECK-ZBB-NEXT:    sgtz a2, a1
-; CHECK-ZBB-NEXT:    lui a3, 524288
-; CHECK-ZBB-NEXT:    addi a3, a3, -1
-; CHECK-ZBB-NEXT:    xor a0, a0, a3
+; CHECK-ZBB-NEXT:    lui a2, 524288
+; CHECK-ZBB-NEXT:    addi a2, a2, -1
+; CHECK-ZBB-NEXT:    xor a0, a0, a2
 ; CHECK-ZBB-NEXT:    sext.w a0, a0
+; CHECK-ZBB-NEXT:    max a0, a0, zero
 ; CHECK-ZBB-NEXT:    slt a0, a0, a1
-; CHECK-ZBB-NEXT:    and a0, a2, a0
 ; CHECK-ZBB-NEXT:    ret
   %cmp1 = icmp sgt i32 %x, 0
   %sub = sub nsw i32 2147483647, %x  ; 0x7fffffff
@@ -52,12 +51,11 @@ define i1 @pr85190(i64 %a) {
 ; CHECK-ZBB-LABEL: pr85190:
 ; CHECK-ZBB:       # %bb.0:
 ; CHECK-ZBB-NEXT:    ori a1, a0, 7
-; CHECK-ZBB-NEXT:    slti a2, a0, 0
-; CHECK-ZBB-NEXT:    li a3, -1
-; CHECK-ZBB-NEXT:    slli a3, a3, 63
-; CHECK-ZBB-NEXT:    sub a3, a3, a1
-; CHECK-ZBB-NEXT:    slt a0, a0, a3
-; CHECK-ZBB-NEXT:    and a0, a2, a0
+; CHECK-ZBB-NEXT:    li a2, -1
+; CHECK-ZBB-NEXT:    slli a2, a2, 63
+; CHECK-ZBB-NEXT:    sub a2, a2, a1
+; CHECK-ZBB-NEXT:    min a1, a2, zero
+; CHECK-ZBB-NEXT:    slt a0, a0, a1
 ; CHECK-ZBB-NEXT:    ret
   %or = or i64 %a, 7
   %cmp1 = icmp slt i64 %a, 0
@@ -83,13 +81,12 @@ define i1 @select_to_or(i32 %x) {
 ; CHECK-ZBB-LABEL: select_to_or:
 ; CHECK-ZBB:       # %bb.0:
 ; CHECK-ZBB-NEXT:    sext.w a1, a0
-; CHECK-ZBB-NEXT:    sgtz a2, a1
-; CHECK-ZBB-NEXT:    lui a3, 524288
-; CHECK-ZBB-NEXT:    addi a3, a3, -1
-; CHECK-ZBB-NEXT:    xor a0, a0, a3
+; CHECK-ZBB-NEXT:    lui a2, 524288
+; CHECK-ZBB-NEXT:    addi a2, a2, -1
+; CHECK-ZBB-NEXT:    xor a0, a0, a2
 ; CHECK-ZBB-NEXT:    sext.w a0, a0
+; CHECK-ZBB-NEXT:    min a0, a0, zero
 ; CHECK-ZBB-NEXT:    slt a0, a0, a1
-; CHECK-ZBB-NEXT:    or a0, a2, a0
 ; CHECK-ZBB-NEXT:    ret
   %cmp1 = icmp sgt i32 %x, 0
   %sub = sub nsw i32 2147483647, %x  ; 0x7fffffff
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 3826c9a32da20e..86ebb1e40870f8 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -396,8 +396,8 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, ptr %y.ptr) nounwind {
 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
 ; CHECK-LABEL: test20:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x76,0xcb]
-; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc9]
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
+; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp eq <16 x i32> %x1, %y1
@@ -410,8 +410,8 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
 ; CHECK-LABEL: test21:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0xed,0x48,0x1f,0xcb,0x05]
-; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc9,0x02]
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %mask1 = icmp sge <8 x i64> %x1, %y1
diff --git a/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
index c5cb858286e821..500a71b7dde277 100644
--- a/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
@@ -96,8 +96,8 @@ define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, ptr %y.ptr) nounwind {
 define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind {
 ; CHECK-LABEL: test9:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
-; CHECK-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp eq <32 x i16> %x1, %y1
@@ -110,8 +110,8 @@ define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16
 define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind {
 ; CHECK-LABEL: test10:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltb %zmm3, %zmm2, %k1
-; CHECK-NEXT:    vpcmpleb %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpleb %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpnltb %zmm3, %zmm2, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmb %zmm0, %zmm2, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <64 x i8> %x1, %y1
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
index a2f4a2046a74b3..ee750834907463 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
@@ -96,8 +96,8 @@ define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, ptr %y.ptr) nounwind
 define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind {
 ; CHECK-LABEL: test256_9:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
-; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; CHECK-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp eq <16 x i16> %x1, %y1
@@ -110,8 +110,8 @@ define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x
 define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind {
 ; CHECK-LABEL: test256_10:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltb %ymm3, %ymm2, %k1
-; CHECK-NEXT:    vpcmpleb %ymm1, %ymm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpleb %ymm1, %ymm0, %k1
+; CHECK-NEXT:    vpcmpnltb %ymm3, %ymm2, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <32 x i8> %x1, %y1
@@ -246,8 +246,8 @@ define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, ptr %y.ptr) nounwind {
 define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind {
 ; CHECK-LABEL: test128_9:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
-; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; CHECK-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp eq <8 x i16> %x1, %y1
@@ -260,8 +260,8 @@ define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16>
 define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind {
 ; CHECK-LABEL: test128_10:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpnltb %xmm3, %xmm2, %k1
-; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k1
+; CHECK-NEXT:    vpcmpnltb %xmm3, %xmm2, %k1 {%k1}
 ; CHECK-NEXT:    vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <16 x i8> %x1, %y1
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
index 12074c292512bb..5b09e45b6fcf17 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -264,8 +264,8 @@ define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, ptr %y.ptr) nounwind {
 define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind {
 ; VLX-LABEL: test256_9:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
-; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 {%k1}
+; VLX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
+; VLX-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 {%k1}
 ; VLX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -275,8 +275,8 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32>
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
-; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -290,8 +290,8 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32>
 define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind {
 ; VLX-LABEL: test256_10:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpnltq %ymm3, %ymm2, %k1
-; VLX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 {%k1}
+; VLX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
+; VLX-NEXT:    vpcmpnltq %ymm3, %ymm2, %k1 {%k1}
 ; VLX-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -301,8 +301,8 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -326,9 +326,9 @@ define <4 x i64> @test256_11(<4 x i64> %x, ptr %y.ptr, <4 x i64> %x1, <4 x i64>
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
-; NoVLX-NEXT:    vmovdqu (%rdi), %ymm2
-; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vmovdqu (%rdi), %ymm3
+; NoVLX-NEXT:    vpcmpgtq %zmm3, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -353,9 +353,9 @@ define <8 x i32> @test256_12(<8 x i32> %x, ptr %y.ptr, <8 x i32> %x1, <8 x i32>
 ; NoVLX-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1
-; NoVLX-NEXT:    vmovdqu (%rdi), %ymm2
-; NoVLX-NEXT:    vpcmpleud %zmm2, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vmovdqu (%rdi), %ymm3
+; NoVLX-NEXT:    vpcmpleud %zmm3, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -819,8 +819,8 @@ define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, ptr %y.ptr) nounwind {
 define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind {
 ; VLX-LABEL: test128_9:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
-; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 {%k1}
+; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; VLX-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 {%k1}
 ; VLX-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -830,8 +830,8 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32>
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
-; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -845,8 +845,8 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32>
 define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind {
 ; VLX-LABEL: test128_10:
 ; VLX:       # %bb.0:
-; VLX-NEXT:    vpcmpnltq %xmm3, %xmm2, %k1
-; VLX-NEXT:    vpcmpleq %xmm1, %xmm0, %k1 {%k1}
+; VLX-NEXT:    vpcmpleq %xmm1, %xmm0, %k1
+; VLX-NEXT:    vpcmpnltq %xmm3, %xmm2, %k1 {%k1}
 ; VLX-NEXT:    vpblendmq %xmm0, %xmm2, %xmm0 {%k1}
 ; VLX-NEXT:    retq
 ;
@@ -856,8 +856,8 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1
-; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -881,9 +881,9 @@ define <2 x i64> @test128_11(<2 x i64> %x, ptr %y.ptr, <2 x i64> %x1, <2 x i64>
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
-; NoVLX-NEXT:    vmovdqu (%rdi), %xmm2
-; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vmovdqu (%rdi), %xmm3
+; NoVLX-NEXT:    vpcmpgtq %zmm3, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
@@ -908,9 +908,9 @@ define <4 x i32> @test128_12(<4 x i32> %x, ptr %y.ptr, <4 x i32> %x1, <4 x i32>
 ; NoVLX-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; NoVLX-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1
-; NoVLX-NEXT:    vmovdqu (%rdi), %xmm2
-; NoVLX-NEXT:    vpcmpleud %zmm2, %zmm0, %k1 {%k1}
+; NoVLX-NEXT:    vmovdqu (%rdi), %xmm3
+; NoVLX-NEXT:    vpcmpleud %zmm3, %zmm0, %k1
+; NoVLX-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 {%k1}
 ; NoVLX-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 ; NoVLX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; NoVLX-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/fold-select.ll b/llvm/test/CodeGen/X86/fold-select.ll
index fff7136219714d..73268d9c1d5f45 100644
--- a/llvm/test/CodeGen/X86/fold-select.ll
+++ b/llvm/test/CodeGen/X86/fold-select.ll
@@ -98,9 +98,9 @@ define <8 x float> @select_or_v8i1_2(i8 %m1, i8 %m2, i8 %m3, <8 x float> %d) {
 define <8 x float> @select_or_v8i1_3(<8 x i16> %m1, <8 x i16> %m2, <8 x i16> %m3, <8 x float> %d) {
 ; CHECK-LABEL: select_or_v8i1_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcmpeqw %xmm2, %xmm0, %k1
+; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k1
 ; CHECK-NEXT:    vpcmpeqw %xmm2, %xmm1, %k0
-; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k1 {%k1}
+; CHECK-NEXT:    vpcmpeqw %xmm2, %xmm0, %k1 {%k1}
 ; CHECK-NEXT:    korb %k1, %k0, %k1
 ; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
 ; CHECK-NEXT:    vmovaps %ymm3, %ymm0 {%k1}
diff --git a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
index 97c3c2040b2914..a80d8d8cd01b85 100644
--- a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
+++ b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll
@@ -46,7 +46,6 @@ define void @failing(ptr %0, ptr %1) nounwind {
 ; CHECK-NEXT:    movq 24(%rsi), %rcx
 ; CHECK-NEXT:    movq 32(%rsi), %rdx
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
-; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [1,1]
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [2,2]
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -54,39 +53,45 @@ define void @failing(ptr %0, ptr %1) nounwind {
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-NEXT:    xorpd %xmm3, %xmm3
-; CHECK-NEXT:    movq $-1024, %rdi # imm = 0xFC00
+; CHECK-NEXT:    movq $-1024, %rsi # imm = 0xFC00
 ; CHECK-NEXT:    movdqa %xmm0, %xmm4
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_2: # %vector.body
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    cmpq 1024(%rdx,%rdi), %rsi
-; CHECK-NEXT:    movq %rcx, %r8
-; CHECK-NEXT:    sbbq 1032(%rdx,%rdi), %r8
-; CHECK-NEXT:    setge %r8b
-; CHECK-NEXT:    movzbl %r8b, %r8d
-; CHECK-NEXT:    andl $1, %r8d
+; CHECK-NEXT:    movdqu 1024(%rdx,%rsi), %xmm5
+; CHECK-NEXT:    movdqu 1040(%rdx,%rsi), %xmm6
+; CHECK-NEXT:    movq %xmm5, %rdi
+; CHECK-NEXT:    movq %xmm6, %r8
+; CHECK-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
+; CHECK-NEXT:    movq %xmm5, %r9
+; CHECK-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3]
+; CHECK-NEXT:    movq %xmm5, %r10
 ; CHECK-NEXT:    negq %r8
-; CHECK-NEXT:    movq %r8, %xmm5
-; CHECK-NEXT:    cmpq 1040(%rdx,%rdi), %rsi
 ; CHECK-NEXT:    movq %rcx, %r8
-; CHECK-NEXT:    sbbq 1048(%rdx,%rdi), %r8
+; CHECK-NEXT:    sbbq %r10, %r8
 ; CHECK-NEXT:    setge %r8b
 ; CHECK-NEXT:    movzbl %r8b, %r8d
-; CHECK-NEXT:    andl $1, %r8d
 ; CHECK-NEXT:    negq %r8
-; CHECK-NEXT:    movq %r8, %xmm6
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
-; CHECK-NEXT:    movdqa %xmm1, %xmm6
-; CHECK-NEXT:    psllq %xmm4, %xmm6
+; CHECK-NEXT:    movq %r8, %xmm5
+; CHECK-NEXT:    negq %rdi
+; CHECK-NEXT:    movq %rcx, %rdi
+; CHECK-NEXT:    sbbq %r9, %rdi
+; CHECK-NEXT:    setge %dil
+; CHECK-NEXT:    movzbl %dil, %edi
+; CHECK-NEXT:    negq %rdi
+; CHECK-NEXT:    movq %rdi, %xmm6
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0]
+; CHECK-NEXT:    movdqa %xmm1, %xmm5
+; CHECK-NEXT:    psllq %xmm4, %xmm5
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
 ; CHECK-NEXT:    movdqa %xmm1, %xmm8
 ; CHECK-NEXT:    psllq %xmm7, %xmm8
-; CHECK-NEXT:    movsd {{.*#+}} xmm8 = xmm6[0],xmm8[1]
-; CHECK-NEXT:    andpd %xmm5, %xmm8
+; CHECK-NEXT:    movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1]
+; CHECK-NEXT:    andpd %xmm6, %xmm8
 ; CHECK-NEXT:    orpd %xmm8, %xmm3
 ; CHECK-NEXT:    paddq %xmm2, %xmm4
-; CHECK-NEXT:    addq $32, %rdi
+; CHECK-NEXT:    addq $32, %rsi
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.3: # %middle.block
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
@@ -101,7 +106,6 @@ define void @failing(ptr %0, ptr %1) nounwind {
 ; CHECK-AVX2-NEXT:    movq 24(%rsi), %rcx
 ; CHECK-AVX2-NEXT:    movq 32(%rsi), %rdx
 ; CHECK-AVX2-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [0,1]
-; CHECK-AVX2-NEXT:    xorl %esi, %esi
 ; CHECK-AVX2-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [1,1]
 ; CHECK-AVX2-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [2,2]
 ; CHECK-AVX2-NEXT:    .p2align 4, 0x90
@@ -109,34 +113,40 @@ define void @failing(ptr %0, ptr %1) nounwind {
 ; CHECK-AVX2-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-AVX2-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; CHECK-AVX2-NEXT:    movq $-1024, %rdi # imm = 0xFC00
+; CHECK-AVX2-NEXT:    movq $-1024, %rsi # imm = 0xFC00
 ; CHECK-AVX2-NEXT:    vmovdqa %xmm0, %xmm4
 ; CHECK-AVX2-NEXT:    .p2align 4, 0x90
 ; CHECK-AVX2-NEXT:  .LBB0_2: # %vector.body
 ; CHECK-AVX2-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-AVX2-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-AVX2-NEXT:    cmpq 1024(%rdx,%rdi), %rsi
-; CHECK-AVX2-NEXT:    movq %rcx, %r8
-; CHECK-AVX2-NEXT:    sbbq 1032(%rdx,%rdi), %r8
+; CHECK-AVX2-NEXT:    vmovdqu 1024(%rdx,%rsi), %xmm5
+; CHECK-AVX2-NEXT:    vmovdqu 1040(%rdx,%rsi), %xmm6
+; CHECK-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm7 = xmm5[0],xmm6[0]
+; CHECK-AVX2-NEXT:    vpunpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
+; CHECK-AVX2-NEXT:    vmovq %xmm5, %rdi
+; CHECK-AVX2-NEXT:    vpextrq $1, %xmm5, %r8
+; CHECK-AVX2-NEXT:    vmovq %xmm7, %r9
+; CHECK-AVX2-NEXT:    vpextrq $1, %xmm7, %r10
+; CHECK-AVX2-NEXT:    negq %r10
+; CHECK-AVX2-NEXT:    movq %rcx, %r10
+; CHECK-AVX2-NEXT:    sbbq %r8, %r10
 ; CHECK-AVX2-NEXT:    setge %r8b
 ; CHECK-AVX2-NEXT:    movzbl %r8b, %r8d
-; CHECK-AVX2-NEXT:    andl $1, %r8d
 ; CHECK-AVX2-NEXT:    negq %r8
 ; CHECK-AVX2-NEXT:    vmovq %r8, %xmm5
-; CHECK-AVX2-NEXT:    cmpq 1040(%rdx,%rdi), %rsi
+; CHECK-AVX2-NEXT:    negq %r9
 ; CHECK-AVX2-NEXT:    movq %rcx, %r8
-; CHECK-AVX2-NEXT:    sbbq 1048(%rdx,%rdi), %r8
-; CHECK-AVX2-NEXT:    setge %r8b
-; CHECK-AVX2-NEXT:    movzbl %r8b, %r8d
-; CHECK-AVX2-NEXT:    andl $1, %r8d
-; CHECK-AVX2-NEXT:    negq %r8
-; CHECK-AVX2-NEXT:    vmovq %r8, %xmm6
-; CHECK-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
+; CHECK-AVX2-NEXT:    sbbq %rdi, %r8
+; CHECK-AVX2-NEXT:    setge %dil
+; CHECK-AVX2-NEXT:    movzbl %dil, %edi
+; CHECK-AVX2-NEXT:    negq %rdi
+; CHECK-AVX2-NEXT:    vmovq %rdi, %xmm6
+; CHECK-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm5 = xmm6[0],xmm5[0]
 ; CHECK-AVX2-NEXT:    vpsllvq %xmm4, %xmm1, %xmm6
 ; CHECK-AVX2-NEXT:    vpand %xmm6, %xmm5, %xmm5
 ; CHECK-AVX2-NEXT:    vpor %xmm3, %xmm5, %xmm3
 ; CHECK-AVX2-NEXT:    vpaddq %xmm2, %xmm4, %xmm4
-; CHECK-AVX2-NEXT:    addq $32, %rdi
+; CHECK-AVX2-NEXT:    addq $32, %rsi
 ; CHECK-AVX2-NEXT:    jne .LBB0_2
 ; CHECK-AVX2-NEXT:  # %bb.3: # %middle.block
 ; CHECK-AVX2-NEXT:    # in Loop: Header=BB0_1 Depth=1
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index fe17e415dbeb4b..30202701fdb8c8 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1541,9 +1541,8 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,1,1,3,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; SSE2-NEXT:    psllq $63, %xmm0
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; SSE2-NEXT:    movmskpd %xmm0, %eax
 ; SSE2-NEXT:    cmpl $3, %eax
 ; SSE2-NEXT:    sete %al
@@ -1556,8 +1555,7 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; SSE42-NEXT:    movzwl (%rsi), %eax
 ; SSE42-NEXT:    movd %eax, %xmm1
 ; SSE42-NEXT:    pcmpeqb %xmm0, %xmm1
-; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; SSE42-NEXT:    psllq $63, %xmm0
+; SSE42-NEXT:    pmovsxbq %xmm1, %xmm0
 ; SSE42-NEXT:    movmskpd %xmm0, %eax
 ; SSE42-NEXT:    cmpl $3, %eax
 ; SSE42-NEXT:    sete %al
@@ -1570,8 +1568,7 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; AVX1OR2-NEXT:    movzwl (%rsi), %eax
 ; AVX1OR2-NEXT:    vmovd %eax, %xmm1
 ; AVX1OR2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1OR2-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovsxbq %xmm0, %xmm0
 ; AVX1OR2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1OR2-NEXT:    vtestpd %xmm1, %xmm0
 ; AVX1OR2-NEXT:    setb %al
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index 4f91eb2cb0a5a1..2df39d69dbb751 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -1424,9 +1424,8 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,1,1,3,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; SSE2-NEXT:    psllq $63, %xmm0
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; SSE2-NEXT:    movmskpd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
 ; SSE2-NEXT:    setne %al
@@ -1439,8 +1438,7 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; SSE42-NEXT:    movzwl (%rsi), %eax
 ; SSE42-NEXT:    movd %eax, %xmm1
 ; SSE42-NEXT:    pcmpeqb %xmm0, %xmm1
-; SSE42-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; SSE42-NEXT:    psllq $63, %xmm0
+; SSE42-NEXT:    pmovsxbq %xmm1, %xmm0
 ; SSE42-NEXT:    movmskpd %xmm0, %eax
 ; SSE42-NEXT:    testl %eax, %eax
 ; SSE42-NEXT:    setne %al
@@ -1453,8 +1451,7 @@ define i1 @select_v2i8(ptr %s0, ptr %s1) {
 ; AVX1OR2-NEXT:    movzwl (%rsi), %eax
 ; AVX1OR2-NEXT:    vmovd %eax, %xmm1
 ; AVX1OR2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1OR2-NEXT:    vpsllq $63, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vpmovsxbq %xmm0, %xmm0
 ; AVX1OR2-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq



More information about the llvm-commits mailing list