[llvm] [X86]: Rewrite demorgan rule for ANDN (PR #163789)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Wed Oct 22 12:07:18 PDT 2025
    
    
  
https://github.com/kper updated https://github.com/llvm/llvm-project/pull/163789
>From ee35653e45cdfec53652118e9c93f0f12e597fbb Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 13:53:37 +0000
Subject: [PATCH 01/20] [X86]: Reassoc demorgan rule for ANDN
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 28 ++++++
 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 98 +++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0b64ff370b10..e2632d114ce0b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51651,6 +51651,31 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
   return AndN;
 }
 
+// fold (not (or A, B)) -> nand(A, not(B)) if BMI
+static SDValue
+combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL,
+                                      SelectionDAG &DAG,
+                                      const X86Subtarget &Subtarget) {
+  using namespace llvm::SDPatternMatch;
+
+  EVT VT = Xor->getValueType(0);
+  // Make sure this node is a candidate for BMI instructions.
+  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+    return SDValue();
+
+  SDValue A;
+  SDValue B;
+  APInt Cst;
+  if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+        Cst.isAllOnes()))
+    return SDValue();
+
+  auto Opcode =
+      Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr;
+  auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT));
+  return SDValue(AndN, 0);
+}
+
 static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
                                         SelectionDAG &DAG,
                                         TargetLowering::DAGCombinerInfo &DCI,
@@ -55150,6 +55175,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget))
+    return R;
+
   return combineFneg(N, DAG, DCI, Subtarget);
 }
 
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
new file mode 100644
index 0000000000000..ea81d08cd2e6d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+
+define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: reassoc_demorgan_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    notl %ecx
+; X86-NEXT:    andnl %ecx, %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: reassoc_demorgan_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    notl %edi
+; X64-NEXT:    andnl %edi, %esi, %eax
+; X64-NEXT:    retq
+  %temp = or i32 %b, %a
+  %res = xor i32 %temp, -1
+  ret i32 %res
+}
+
+define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; X86-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andnl %eax, %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    orl %esi, %edi
+; X64-NEXT:    notl %edx
+; X64-NEXT:    andnl %edx, %edi, %eax
+; X64-NEXT:    retq
+  %and.demorgan = or i32 %b, %a
+  %and3.demorgan = or i32 %and.demorgan, %c
+  %and3 = xor i32 %and3.demorgan, -1
+  ret i32 %and3
+}
+
+define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: reassoc_demorgan_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    notl %edx
+; X86-NEXT:    andnl %edx, %eax, %eax
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andnl %esi, %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: reassoc_demorgan_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    notq %rdi
+; X64-NEXT:    andnq %rdi, %rsi, %rax
+; X64-NEXT:    retq
+  %temp = or i64 %b, %a
+  %res = xor i64 %temp, -1
+  ret i64 %res
+}
+
+define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; X86-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andnl %eax, %edx, %eax
+; X86-NEXT:    notl %ecx
+; X86-NEXT:    andnl %ecx, %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    orq %rsi, %rdi
+; X64-NEXT:    notq %rdx
+; X64-NEXT:    andnq %rdx, %rdi, %rax
+; X64-NEXT:    retq
+  %and.demorgan = or i64 %b, %a
+  %and3.demorgan = or i64 %and.demorgan, %c
+  %and3 = xor i64 %and3.demorgan, -1
+  ret i64 %and3
+}
>From deda3383ab0015a23d521f37d44a5714def2346c Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:24:40 +0000
Subject: [PATCH 02/20] [X86]: Removed obsolete code
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 28 -------------------------
 1 file changed, 28 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e2632d114ce0b..a0b64ff370b10 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51651,31 +51651,6 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
   return AndN;
 }
 
-// fold (not (or A, B)) -> nand(A, not(B)) if BMI
-static SDValue
-combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL,
-                                      SelectionDAG &DAG,
-                                      const X86Subtarget &Subtarget) {
-  using namespace llvm::SDPatternMatch;
-
-  EVT VT = Xor->getValueType(0);
-  // Make sure this node is a candidate for BMI instructions.
-  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
-    return SDValue();
-
-  SDValue A;
-  SDValue B;
-  APInt Cst;
-  if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
-        Cst.isAllOnes()))
-    return SDValue();
-
-  auto Opcode =
-      Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr;
-  auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT));
-  return SDValue(AndN, 0);
-}
-
 static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
                                         SelectionDAG &DAG,
                                         TargetLowering::DAGCombinerInfo &DCI,
@@ -55175,9 +55150,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
     return R;
 
-  if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget))
-    return R;
-
   return combineFneg(N, DAG, DCI, Subtarget);
 }
 
>From cdb57ef348cbd3ac3bd6b361fd6a3b4ddb9ff8fb Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:25:21 +0000
Subject: [PATCH 03/20] [DAG]: Reassoc demorgan rule for ANDN
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d64d455..0629b75989233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10197,6 +10197,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     }
   }
 
+  // fold (not (or A, B)) -> and(not(A), not(B))
+  if (TLI.hasAndNot(SDValue(N, 0))) {
+    // If we have AndNot then it is profitable to apply demorgan to make use
+    // of the machine instruction.
+    SDValue A;
+    SDValue B;
+    APInt Cst;
+    if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+        Cst.isAllOnes()) {
+      return DAG.getNode(
+          ISD::AND, DL, VT,
+          DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(-1, DL, VT)),
+          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(-1, DL, VT)));
+    }
+  }
+
   return SDValue();
 }
 
>From 9e4103d5d49bbc92b1fdfad30e5e52f51f9c70e2 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:35:14 +0000
Subject: [PATCH 04/20] [DAG]: Fixed type
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0629b75989233..5b77dc423b66b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10206,10 +10206,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     APInt Cst;
     if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
         Cst.isAllOnes()) {
+      auto Ty = N->getValueType(0);
       return DAG.getNode(
           ISD::AND, DL, VT,
-          DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(-1, DL, VT)),
-          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(-1, DL, VT)));
+          DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)),
+          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)));
     }
   }
 
>From cf6ee582057a42e9ec8f5b81355c2bee8a8067cb Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:45:46 +0000
Subject: [PATCH 05/20] [X86]: Updated tests
---
 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 210 ++++++++++++------
 1 file changed, 143 insertions(+), 67 deletions(-)
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
index ea81d08cd2e6d..7f3a376b24b2a 100644
--- a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
+++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
@@ -1,42 +1,75 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI
 
 define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
-; X86-LABEL: reassoc_demorgan_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    andnl %ecx, %eax, %eax
-; X86-NEXT:    retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_i32:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT:    notl %ecx
+; X86-WITH-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X86-WITH-BMI-NEXT:    retl
 ;
-; X64-LABEL: reassoc_demorgan_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    notl %edi
-; X64-NEXT:    andnl %edi, %esi, %eax
-; X64-NEXT:    retq
+; X64-WITH-BMI-LABEL: reassoc_demorgan_i32:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    notl %edi
+; X64-WITH-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    retl
+;
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT:    orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT:    notl %eax
+; X64-WITHOUT-BMI-NEXT:    retq
   %temp = or i32 %b, %a
   %res = xor i32 %temp, -1
   ret i32 %res
 }
 
 define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
-; X86-LABEL: reassoc_demorgan_three_arguments_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    notl %eax
-; X86-NEXT:    andnl %eax, %ecx, %eax
-; X86-NEXT:    retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    notl %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %ecx, %ecx
+; X86-WITH-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X86-WITH-BMI-NEXT:    retl
+;
+; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    notl %edi
+; X64-WITH-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-WITH-BMI-NEXT:    andnl %eax, %edx, %eax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    retl
 ;
-; X64-LABEL: reassoc_demorgan_three_arguments_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    orl %esi, %edi
-; X64-NEXT:    notl %edx
-; X64-NEXT:    andnl %edx, %edi, %eax
-; X64-NEXT:    retq
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT:    orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT:    orl %edx, %eax
+; X64-WITHOUT-BMI-NEXT:    notl %eax
+; X64-WITHOUT-BMI-NEXT:    retq
   %and.demorgan = or i32 %b, %a
   %and3.demorgan = or i32 %and.demorgan, %c
   %and3 = xor i32 %and3.demorgan, -1
@@ -44,53 +77,96 @@ define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwin
 }
 
 define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
-; X86-LABEL: reassoc_demorgan_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    notl %edx
-; X86-NEXT:    andnl %edx, %eax, %eax
-; X86-NEXT:    notl %esi
-; X86-NEXT:    andnl %esi, %ecx, %edx
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_i64:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    pushl %esi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-WITH-BMI-NEXT:    notl %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %eax, %eax
+; X86-WITH-BMI-NEXT:    notl %esi
+; X86-WITH-BMI-NEXT:    andnl %esi, %ecx, %edx
+; X86-WITH-BMI-NEXT:    popl %esi
+; X86-WITH-BMI-NEXT:    retl
 ;
-; X64-LABEL: reassoc_demorgan_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    notq %rdi
-; X64-NEXT:    andnq %rdi, %rsi, %rax
-; X64-NEXT:    retq
+; X64-WITH-BMI-LABEL: reassoc_demorgan_i64:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    notq %rdi
+; X64-WITH-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    notl %edx
+; X86-WITHOUT-BMI-NEXT:    retl
+;
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT:    orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT:    notq %rax
+; X64-WITHOUT-BMI-NEXT:    retq
   %temp = or i64 %b, %a
   %res = xor i64 %temp, -1
   ret i64 %res
 }
 
 define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
-; X86-LABEL: reassoc_demorgan_three_arguments_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    notl %eax
-; X86-NEXT:    andnl %eax, %edx, %eax
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    andnl %ecx, %esi, %edx
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    pushl %ebx
+; X86-WITH-BMI-NEXT:    pushl %edi
+; X86-WITH-BMI-NEXT:    pushl %esi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-WITH-BMI-NEXT:    notl %edi
+; X86-WITH-BMI-NEXT:    andnl %edi, %edx, %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %eax, %eax
+; X86-WITH-BMI-NEXT:    notl %ebx
+; X86-WITH-BMI-NEXT:    andnl %ebx, %esi, %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %ecx, %edx
+; X86-WITH-BMI-NEXT:    popl %esi
+; X86-WITH-BMI-NEXT:    popl %edi
+; X86-WITH-BMI-NEXT:    popl %ebx
+; X86-WITH-BMI-NEXT:    retl
+;
+; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    notq %rdi
+; X64-WITH-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-WITH-BMI-NEXT:    andnq %rax, %rdx, %rax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    notl %edx
+; X86-WITHOUT-BMI-NEXT:    retl
 ;
-; X64-LABEL: reassoc_demorgan_three_arguments_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    orq %rsi, %rdi
-; X64-NEXT:    notq %rdx
-; X64-NEXT:    andnq %rdx, %rdi, %rax
-; X64-NEXT:    retq
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT:    orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT:    orq %rdx, %rax
+; X64-WITHOUT-BMI-NEXT:    notq %rax
+; X64-WITHOUT-BMI-NEXT:    retq
   %and.demorgan = or i64 %b, %a
   %and3.demorgan = or i64 %and.demorgan, %c
   %and3 = xor i64 %and3.demorgan, -1
>From cef0067d84a41aad176352f02d9585dd71355a71 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 20:16:46 +0000
Subject: [PATCH 06/20] [DAG]: Updated tests
---
 llvm/test/CodeGen/X86/andnot-patterns.ll | 184 ++++++++++++++---------
 1 file changed, 116 insertions(+), 68 deletions(-)
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index fc573fbd4fc99..0701d7046fc35 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -761,6 +761,7 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ;
 ; X86-BMI-LABEL: andnot_bitreverse_i64:
 ; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
 ; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI-NEXT:    bswapl %eax
@@ -774,13 +775,16 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
 ; X86-BMI-NEXT:    shrl $2, %eax
 ; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT:    leal (%eax,%edx,4), %eax
-; X86-BMI-NEXT:    movl %eax, %edx
-; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%edx,4), %esi
+; X86-BMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
+; X86-BMI-NEXT:    addl %esi, %esi
+; X86-BMI-NEXT:    shll $2, %edx
+; X86-BMI-NEXT:    notl %edx
+; X86-BMI-NEXT:    andnl %edx, %eax, %eax
 ; X86-BMI-NEXT:    shrl %eax
-; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-BMI-NEXT:    leal (%eax,%edx,2), %eax
-; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    orl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-BMI-NEXT:    andnl %eax, %esi, %eax
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI-NEXT:    bswapl %ecx
 ; X86-BMI-NEXT:    movl %ecx, %edx
 ; X86-BMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
@@ -792,13 +796,17 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
 ; X86-BMI-NEXT:    shrl $2, %ecx
 ; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-BMI-NEXT:    leal (%ecx,%edx,4), %ecx
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%ecx,%edx,4), %esi
+; X86-BMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
+; X86-BMI-NEXT:    addl %esi, %esi
+; X86-BMI-NEXT:    shll $2, %edx
+; X86-BMI-NEXT:    notl %edx
+; X86-BMI-NEXT:    andnl %edx, %ecx, %ecx
 ; X86-BMI-NEXT:    shrl %ecx
-; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-BMI-NEXT:    leal (%ecx,%edx,2), %ecx
-; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI-NEXT:    orl $-1431655766, %ecx # imm = 0xAAAAAAAA
+; X86-BMI-NEXT:    andnl %ecx, %esi, %edx
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    popl %esi
 ; X86-BMI-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i64:
@@ -837,19 +845,23 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X64-BMI-NEXT:    andq %rcx, %rsi
 ; X64-BMI-NEXT:    shlq $4, %rsi
 ; X64-BMI-NEXT:    orq %rax, %rsi
-; X64-BMI-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-BMI-NEXT:    movq %rsi, %rcx
-; X64-BMI-NEXT:    andq %rax, %rcx
-; X64-BMI-NEXT:    shrq $2, %rsi
-; X64-BMI-NEXT:    andq %rax, %rsi
-; X64-BMI-NEXT:    leaq (%rsi,%rcx,4), %rax
-; X64-BMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-BMI-NEXT:    movq %rax, %rdx
-; X64-BMI-NEXT:    andq %rcx, %rdx
-; X64-BMI-NEXT:    shrq %rax
+; X64-BMI-NEXT:    movq %rsi, %rax
+; X64-BMI-NEXT:    shrq $2, %rax
+; X64-BMI-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
 ; X64-BMI-NEXT:    andq %rcx, %rax
-; X64-BMI-NEXT:    leaq (%rax,%rdx,2), %rax
-; X64-BMI-NEXT:    andnq %rdi, %rax, %rax
+; X64-BMI-NEXT:    andq %rcx, %rsi
+; X64-BMI-NEXT:    leaq (,%rsi,4), %rcx
+; X64-BMI-NEXT:    notq %rcx
+; X64-BMI-NEXT:    andnq %rcx, %rax, %rcx
+; X64-BMI-NEXT:    shrq %rcx
+; X64-BMI-NEXT:    movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA
+; X64-BMI-NEXT:    orq %rcx, %rdx
+; X64-BMI-NEXT:    leaq (%rax,%rsi,4), %rax
+; X64-BMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT:    andq %rax, %rcx
+; X64-BMI-NEXT:    addq %rcx, %rcx
+; X64-BMI-NEXT:    andnq %rdx, %rcx, %rax
+; X64-BMI-NEXT:    andq %rdi, %rax
 ; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not)
@@ -896,13 +908,16 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
 ; X86-BMI-NEXT:    shrl $2, %eax
 ; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-BMI-NEXT:    movl %eax, %ecx
-; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%ecx,4), %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    addl %edx, %edx
+; X86-BMI-NEXT:    shll $2, %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andnl %ecx, %eax, %eax
 ; X86-BMI-NEXT:    shrl %eax
-; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-BMI-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    orl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-BMI-NEXT:    andnl %eax, %edx, %eax
+; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i32:
@@ -940,16 +955,19 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X64-BMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
 ; X64-BMI-NEXT:    orl %eax, %esi
 ; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    shrl $2, %eax
 ; X64-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-BMI-NEXT:    shrl $2, %esi
 ; X64-BMI-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
-; X64-BMI-NEXT:    movl %eax, %ecx
-; X64-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X64-BMI-NEXT:    shrl %eax
+; X64-BMI-NEXT:    leal (,%rsi,4), %ecx
+; X64-BMI-NEXT:    notl %ecx
+; X64-BMI-NEXT:    andnl %ecx, %eax, %ecx
+; X64-BMI-NEXT:    shrl %ecx
+; X64-BMI-NEXT:    orl $-1431655766, %ecx # imm = 0xAAAAAAAA
+; X64-BMI-NEXT:    leal (%rax,%rsi,4), %eax
 ; X64-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-BMI-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI-NEXT:    addl %eax, %eax
+; X64-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X64-BMI-NEXT:    andl %edi, %eax
 ; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not)
@@ -958,30 +976,57 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 }
 
 define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
-; X86-LABEL: andnot_bitreverse_i16:
-; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    rolw $8, %ax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $13107, %eax # imm = 0x3333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $21845, %eax # imm = 0x5555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
-; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bitreverse_i16:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    rolw $8, %ax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; X86-NOBMI-NEXT:    shll $4, %ecx
+; X86-NOBMI-NEXT:    shrl $4, %eax
+; X86-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X86-NOBMI-NEXT:    orl %ecx, %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $13107, %ecx # imm = 0x3333
+; X86-NOBMI-NEXT:    shrl $2, %eax
+; X86-NOBMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X86-NOBMI-NEXT:    shrl %eax
+; X86-NOBMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andw {{[0-9]+}}(%esp), %ax
+; X86-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: andnot_bitreverse_i16:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    rolw $8, %ax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; X86-BMI-NEXT:    shll $4, %ecx
+; X86-BMI-NEXT:    shrl $4, %eax
+; X86-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X86-BMI-NEXT:    orl %ecx, %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $13107, %ecx # imm = 0x3333
+; X86-BMI-NEXT:    shrl $2, %eax
+; X86-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X86-BMI-NEXT:    leal (%eax,%ecx,4), %edx
+; X86-BMI-NEXT:    andl $21845, %edx # imm = 0x5555
+; X86-BMI-NEXT:    addl %edx, %edx
+; X86-BMI-NEXT:    shll $2, %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X86-BMI-NEXT:    shrl %eax
+; X86-BMI-NEXT:    orl $43690, %eax # imm = 0xAAAA
+; X86-BMI-NEXT:    andnl %eax, %edx, %eax
+; X86-BMI-NEXT:    andw {{[0-9]+}}(%esp), %ax
+; X86-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-BMI-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i16:
 ; X64-NOBMI:       # %bb.0:
@@ -1019,16 +1064,19 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X64-BMI-NEXT:    andl $3855, %esi # imm = 0xF0F
 ; X64-BMI-NEXT:    orl %eax, %esi
 ; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    shrl $2, %eax
 ; X64-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-BMI-NEXT:    shrl $2, %esi
 ; X64-BMI-NEXT:    andl $13107, %esi # imm = 0x3333
-; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
-; X64-BMI-NEXT:    movl %eax, %ecx
-; X64-BMI-NEXT:    andl $21845, %ecx # imm = 0x5555
-; X64-BMI-NEXT:    shrl %eax
+; X64-BMI-NEXT:    leal (,%rsi,4), %ecx
+; X64-BMI-NEXT:    notl %ecx
+; X64-BMI-NEXT:    andnl %ecx, %eax, %ecx
+; X64-BMI-NEXT:    shrl %ecx
+; X64-BMI-NEXT:    orl $-21846, %ecx # imm = 0xAAAA
+; X64-BMI-NEXT:    leal (%rax,%rsi,4), %eax
 ; X64-BMI-NEXT:    andl $21845, %eax # imm = 0x5555
-; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-BMI-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI-NEXT:    addl %eax, %eax
+; X64-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X64-BMI-NEXT:    andl %edi, %eax
 ; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
>From e31e5ab5bc605bd83d9090b207fabf7094294679 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Fri, 17 Oct 2025 05:08:17 +0000
Subject: [PATCH 07/20] [DAG]: Updated tests
---
 .../AArch64/neon-compare-instructions.ll      | 264 ++++++++++++------
 llvm/test/CodeGen/PowerPC/vsx.ll              |  62 ++--
 2 files changed, 219 insertions(+), 107 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 11b3b62ec1c8d..60c6d84679451 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2217,13 +2217,21 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
 
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
 define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmuno2xfloat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmge v2.2s, v0.2s, v1.2s
-; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT:    mvn v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmuno2xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvn v1.8b, v2.8b
+; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmuno2xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp uno <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2231,13 +2239,21 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
 
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
 define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmuno4xfloat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
-; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmuno4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmuno4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp uno <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2245,13 +2261,21 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
 
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
 define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmuno2xdouble:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmge v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmuno2xdouble:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmuno2xdouble:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp uno <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -2259,13 +2283,21 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
 
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
 define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmueq2xfloat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
-; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT:    mvn v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmueq2xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvn v1.8b, v2.8b
+; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmueq2xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp ueq <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2273,13 +2305,21 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
 
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
 define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmueq4xfloat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
-; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmueq4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmueq4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp ueq <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2287,13 +2327,21 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
 
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
 define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmueq2xdouble:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmueq2xdouble:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmueq2xdouble:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp ueq <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -2792,13 +2840,21 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
 
 ; UEQ with zero = !ONE = !(OLT |OGT)
 define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
-; CHECK-LABEL: fcmueqz2xfloat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmgt v1.2s, v0.2s, #0.0
-; CHECK-NEXT:    fcmlt v0.2s, v0.2s, #0.0
-; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    mvn v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmueqz2xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmlt v1.2s, v0.2s, #0.0
+; CHECK-SD-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; CHECK-SD-NEXT:    mvn v1.8b, v1.8b
+; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmueqz2xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmgt v1.2s, v0.2s, #0.0
+; CHECK-GI-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2806,13 +2862,21 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
 
 ; UEQ with zero = !ONE = !(OLT |OGT)
 define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
-; CHECK-LABEL: fcmueqz4xfloat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmgt v1.4s, v0.4s, #0.0
-; CHECK-NEXT:    fcmlt v0.4s, v0.4s, #0.0
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmueqz4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmlt v1.4s, v0.4s, #0.0
+; CHECK-SD-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; CHECK-SD-NEXT:    mvn v1.16b, v1.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmueqz4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmgt v1.4s, v0.4s, #0.0
+; CHECK-GI-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2820,13 +2884,21 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
 
 ; UEQ with zero = !ONE = !(OLT |OGT)
 define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
-; CHECK-LABEL: fcmueqz2xdouble:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmgt v1.2d, v0.2d, #0.0
-; CHECK-NEXT:    fcmlt v0.2d, v0.2d, #0.0
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmueqz2xdouble:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmlt v1.2d, v0.2d, #0.0
+; CHECK-SD-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; CHECK-SD-NEXT:    mvn v1.16b, v1.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmueqz2xdouble:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmgt v1.2d, v0.2d, #0.0
+; CHECK-GI-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -3286,39 +3358,63 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 
 
 define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmuno2xfloat_fast:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmge v2.2s, v0.2s, v1.2s
-; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT:    mvn v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmuno2xfloat_fast:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT:    mvn v1.8b, v2.8b
+; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmuno2xfloat_fast:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmuno4xfloat_fast:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
-; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmuno4xfloat_fast:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmuno4xfloat_fast:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp fast uno <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmuno2xdouble_fast:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcmge v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcmuno2xdouble_fast:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmuno2xdouble_fast:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 14b3d69f8c273..3cde26271d50e 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -378,23 +378,27 @@ entry:
 define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test14:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor v2, v2, v3
+; CHECK-NEXT:    xxlnor vs0, v2, v2
+; CHECK-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test14:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xxlnor v2, v2, v3
+; CHECK-REG-NEXT:    xxlnor vs0, v2, v2
+; CHECK-REG-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test14:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xxlor vs0, v2, v3
-; CHECK-FISL-NEXT:    xxlnor v2, v2, v3
+; CHECK-FISL-NEXT:    xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test14:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxlnor v2, v2, v3
+; CHECK-LE-NEXT:    xxlnor vs0, v2, v2
+; CHECK-LE-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-LE-NEXT:    blr
 entry:
   %v = or <4 x i32> %a, %b
@@ -408,23 +412,27 @@ entry:
 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test15:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor v2, v2, v3
+; CHECK-NEXT:    xxlnor vs0, v2, v2
+; CHECK-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test15:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xxlnor v2, v2, v3
+; CHECK-REG-NEXT:    xxlnor vs0, v2, v2
+; CHECK-REG-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test15:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xxlor v4, v2, v3
-; CHECK-FISL-NEXT:    xxlnor v2, v2, v3
+; CHECK-FISL-NEXT:    xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test15:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxlnor v2, v2, v3
+; CHECK-LE-NEXT:    xxlnor vs0, v2, v2
+; CHECK-LE-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-LE-NEXT:    blr
 entry:
   %v = or <8 x i16> %a, %b
@@ -438,23 +446,27 @@ entry:
 define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor v2, v2, v3
+; CHECK-NEXT:    xxlnor vs0, v2, v2
+; CHECK-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test16:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xxlnor v2, v2, v3
+; CHECK-REG-NEXT:    xxlnor vs0, v2, v2
+; CHECK-REG-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test16:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xxlor v4, v2, v3
-; CHECK-FISL-NEXT:    xxlnor v2, v2, v3
+; CHECK-FISL-NEXT:    xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test16:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxlnor v2, v2, v3
+; CHECK-LE-NEXT:    xxlnor vs0, v2, v2
+; CHECK-LE-NEXT:    xxlandc v2, vs0, v3
 ; CHECK-LE-NEXT:    blr
 entry:
   %v = or <16 x i8> %a, %b
@@ -624,34 +636,38 @@ entry:
 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
 ; CHECK-LABEL: test22:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvcmpgtsp vs0, v5, v4
 ; CHECK-NEXT:    xvcmpgtsp vs1, v4, v5
-; CHECK-NEXT:    xxlor vs0, vs1, vs0
-; CHECK-NEXT:    xxsel v2, v2, v3, vs0
+; CHECK-NEXT:    xvcmpgtsp vs0, v5, v4
+; CHECK-NEXT:    xxlnor vs1, vs1, vs1
+; CHECK-NEXT:    xxlandc vs0, vs1, vs0
+; CHECK-NEXT:    xxsel v2, v3, v2, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test22:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xvcmpgtsp vs0, v5, v4
 ; CHECK-REG-NEXT:    xvcmpgtsp vs1, v4, v5
-; CHECK-REG-NEXT:    xxlor vs0, vs1, vs0
-; CHECK-REG-NEXT:    xxsel v2, v2, v3, vs0
+; CHECK-REG-NEXT:    xvcmpgtsp vs0, v5, v4
+; CHECK-REG-NEXT:    xxlnor vs1, vs1, vs1
+; CHECK-REG-NEXT:    xxlandc vs0, vs1, vs0
+; CHECK-REG-NEXT:    xxsel v2, v3, v2, vs0
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test22:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xvcmpgtsp vs1, v5, v4
 ; CHECK-FISL-NEXT:    xvcmpgtsp vs0, v4, v5
-; CHECK-FISL-NEXT:    xxlor vs0, vs0, vs1
-; CHECK-FISL-NEXT:    xxsel v2, v2, v3, vs0
+; CHECK-FISL-NEXT:    xxlnor vs0, vs0, vs0
+; CHECK-FISL-NEXT:    xxlandc vs0, vs0, vs1
+; CHECK-FISL-NEXT:    xxsel v2, v3, v2, vs0
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test22:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xvcmpgtsp vs0, v5, v4
 ; CHECK-LE-NEXT:    xvcmpgtsp vs1, v4, v5
-; CHECK-LE-NEXT:    xxlor vs0, vs1, vs0
-; CHECK-LE-NEXT:    xxsel v2, v2, v3, vs0
+; CHECK-LE-NEXT:    xvcmpgtsp vs0, v5, v4
+; CHECK-LE-NEXT:    xxlnor vs1, vs1, vs1
+; CHECK-LE-NEXT:    xxlandc vs0, vs1, vs0
+; CHECK-LE-NEXT:    xxsel v2, v3, v2, vs0
 ; CHECK-LE-NEXT:    blr
 entry:
   %m = fcmp ueq <4 x float> %c, %d
>From cf9da8380eacff36dd4173c2afd7e39e36c1e50a Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:22:07 +0000
Subject: [PATCH 08/20] Revert "[DAG]: Updated tests"
This reverts commit e31e5ab5bc605bd83d9090b207fabf7094294679.
---
 .../AArch64/neon-compare-instructions.ll      | 264 ++++++------------
 llvm/test/CodeGen/PowerPC/vsx.ll              |  62 ++--
 2 files changed, 107 insertions(+), 219 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 60c6d84679451..11b3b62ec1c8d 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2217,21 +2217,13 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
 
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
 define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-SD-LABEL: fcmuno2xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT:    fcmge v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT:    mvn v1.8b, v2.8b
-; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmuno2xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmge v2.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmuno2xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp uno <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2239,21 +2231,13 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
 
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
 define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-SD-LABEL: fcmuno4xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT:    fcmge v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmuno4xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmuno4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp uno <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2261,21 +2245,13 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
 
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
 define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-SD-LABEL: fcmuno2xdouble:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT:    fcmge v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmuno2xdouble:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmge v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmuno2xdouble:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp uno <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -2283,21 +2259,13 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
 
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
 define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-SD-LABEL: fcmueq2xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT:    mvn v1.8b, v2.8b
-; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmueq2xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmueq2xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
+; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp ueq <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2305,21 +2273,13 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
 
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
 define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-SD-LABEL: fcmueq4xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmueq4xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmueq4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp ueq <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2327,21 +2287,13 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
 
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
 define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-SD-LABEL: fcmueq2xdouble:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmueq2xdouble:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmueq2xdouble:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp ueq <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -2840,21 +2792,13 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
 
 ; UEQ with zero = !ONE = !(OLT |OGT)
 define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
-; CHECK-SD-LABEL: fcmueqz2xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmlt v1.2s, v0.2s, #0.0
-; CHECK-SD-NEXT:    fcmgt v0.2s, v0.2s, #0.0
-; CHECK-SD-NEXT:    mvn v1.8b, v1.8b
-; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmueqz2xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmgt v1.2s, v0.2s, #0.0
-; CHECK-GI-NEXT:    fcmlt v0.2s, v0.2s, #0.0
-; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmueqz2xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v1.2s, v0.2s, #0.0
+; CHECK-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2862,21 +2806,13 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
 
 ; UEQ with zero = !ONE = !(OLT |OGT)
 define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
-; CHECK-SD-LABEL: fcmueqz4xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmlt v1.4s, v0.4s, #0.0
-; CHECK-SD-NEXT:    fcmgt v0.4s, v0.4s, #0.0
-; CHECK-SD-NEXT:    mvn v1.16b, v1.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmueqz4xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmgt v1.4s, v0.4s, #0.0
-; CHECK-GI-NEXT:    fcmlt v0.4s, v0.4s, #0.0
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmueqz4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v1.4s, v0.4s, #0.0
+; CHECK-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2884,21 +2820,13 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
 
 ; UEQ with zero = !ONE = !(OLT |OGT)
 define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
-; CHECK-SD-LABEL: fcmueqz2xdouble:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmlt v1.2d, v0.2d, #0.0
-; CHECK-SD-NEXT:    fcmgt v0.2d, v0.2d, #0.0
-; CHECK-SD-NEXT:    mvn v1.16b, v1.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmueqz2xdouble:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmgt v1.2d, v0.2d, #0.0
-; CHECK-GI-NEXT:    fcmlt v0.2d, v0.2d, #0.0
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmueqz2xdouble:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v1.2d, v0.2d, #0.0
+; CHECK-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -3358,63 +3286,39 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 
 
 define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
-; CHECK-SD-LABEL: fcmuno2xfloat_fast:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT:    fcmge v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT:    mvn v1.8b, v2.8b
-; CHECK-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmuno2xfloat_fast:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmge v2.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT:    mvn v0.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmuno2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
-; CHECK-SD-LABEL: fcmuno4xfloat_fast:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT:    fcmge v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmuno4xfloat_fast:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmuno4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp fast uno <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
-; CHECK-SD-LABEL: fcmuno2xdouble_fast:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fcmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT:    fcmge v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: fcmuno2xdouble_fast:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fcmge v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: fcmuno2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 3cde26271d50e..14b3d69f8c273 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -378,27 +378,23 @@ entry:
 define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test14:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor vs0, v2, v2
-; CHECK-NEXT:    xxlandc v2, vs0, v3
+; CHECK-NEXT:    xxlnor v2, v2, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test14:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xxlnor vs0, v2, v2
-; CHECK-REG-NEXT:    xxlandc v2, vs0, v3
+; CHECK-REG-NEXT:    xxlnor v2, v2, v3
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test14:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xxlor vs0, v2, v3
-; CHECK-FISL-NEXT:    xxlnor vs0, v2, v2
-; CHECK-FISL-NEXT:    xxlandc v2, vs0, v3
+; CHECK-FISL-NEXT:    xxlnor v2, v2, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test14:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxlnor vs0, v2, v2
-; CHECK-LE-NEXT:    xxlandc v2, vs0, v3
+; CHECK-LE-NEXT:    xxlnor v2, v2, v3
 ; CHECK-LE-NEXT:    blr
 entry:
   %v = or <4 x i32> %a, %b
@@ -412,27 +408,23 @@ entry:
 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test15:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor vs0, v2, v2
-; CHECK-NEXT:    xxlandc v2, vs0, v3
+; CHECK-NEXT:    xxlnor v2, v2, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test15:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xxlnor vs0, v2, v2
-; CHECK-REG-NEXT:    xxlandc v2, vs0, v3
+; CHECK-REG-NEXT:    xxlnor v2, v2, v3
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test15:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xxlor v4, v2, v3
-; CHECK-FISL-NEXT:    xxlnor vs0, v2, v2
-; CHECK-FISL-NEXT:    xxlandc v2, vs0, v3
+; CHECK-FISL-NEXT:    xxlnor v2, v2, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test15:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxlnor vs0, v2, v2
-; CHECK-LE-NEXT:    xxlandc v2, vs0, v3
+; CHECK-LE-NEXT:    xxlnor v2, v2, v3
 ; CHECK-LE-NEXT:    blr
 entry:
   %v = or <8 x i16> %a, %b
@@ -446,27 +438,23 @@ entry:
 define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor vs0, v2, v2
-; CHECK-NEXT:    xxlandc v2, vs0, v3
+; CHECK-NEXT:    xxlnor v2, v2, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test16:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xxlnor vs0, v2, v2
-; CHECK-REG-NEXT:    xxlandc v2, vs0, v3
+; CHECK-REG-NEXT:    xxlnor v2, v2, v3
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test16:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xxlor v4, v2, v3
-; CHECK-FISL-NEXT:    xxlnor vs0, v2, v2
-; CHECK-FISL-NEXT:    xxlandc v2, vs0, v3
+; CHECK-FISL-NEXT:    xxlnor v2, v2, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test16:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxlnor vs0, v2, v2
-; CHECK-LE-NEXT:    xxlandc v2, vs0, v3
+; CHECK-LE-NEXT:    xxlnor v2, v2, v3
 ; CHECK-LE-NEXT:    blr
 entry:
   %v = or <16 x i8> %a, %b
@@ -636,38 +624,34 @@ entry:
 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
 ; CHECK-LABEL: test22:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvcmpgtsp vs1, v4, v5
 ; CHECK-NEXT:    xvcmpgtsp vs0, v5, v4
-; CHECK-NEXT:    xxlnor vs1, vs1, vs1
-; CHECK-NEXT:    xxlandc vs0, vs1, vs0
-; CHECK-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-NEXT:    xvcmpgtsp vs1, v4, v5
+; CHECK-NEXT:    xxlor vs0, vs1, vs0
+; CHECK-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test22:
 ; CHECK-REG:       # %bb.0: # %entry
-; CHECK-REG-NEXT:    xvcmpgtsp vs1, v4, v5
 ; CHECK-REG-NEXT:    xvcmpgtsp vs0, v5, v4
-; CHECK-REG-NEXT:    xxlnor vs1, vs1, vs1
-; CHECK-REG-NEXT:    xxlandc vs0, vs1, vs0
-; CHECK-REG-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-REG-NEXT:    xvcmpgtsp vs1, v4, v5
+; CHECK-REG-NEXT:    xxlor vs0, vs1, vs0
+; CHECK-REG-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test22:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xvcmpgtsp vs1, v5, v4
 ; CHECK-FISL-NEXT:    xvcmpgtsp vs0, v4, v5
-; CHECK-FISL-NEXT:    xxlnor vs0, vs0, vs0
-; CHECK-FISL-NEXT:    xxlandc vs0, vs0, vs1
-; CHECK-FISL-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-FISL-NEXT:    xxlor vs0, vs0, vs1
+; CHECK-FISL-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test22:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xvcmpgtsp vs1, v4, v5
 ; CHECK-LE-NEXT:    xvcmpgtsp vs0, v5, v4
-; CHECK-LE-NEXT:    xxlnor vs1, vs1, vs1
-; CHECK-LE-NEXT:    xxlandc vs0, vs1, vs0
-; CHECK-LE-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-LE-NEXT:    xvcmpgtsp vs1, v4, v5
+; CHECK-LE-NEXT:    xxlor vs0, vs1, vs0
+; CHECK-LE-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-LE-NEXT:    blr
 entry:
   %m = fcmp ueq <4 x float> %c, %d
>From 3a90a6990a0970b81fd30e67400074515bc2dbb2 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:22:21 +0000
Subject: [PATCH 09/20] Revert "[DAG]: Updated tests"
This reverts commit cef0067d84a41aad176352f02d9585dd71355a71.
---
 llvm/test/CodeGen/X86/andnot-patterns.ll | 184 +++++++++--------------
 1 file changed, 68 insertions(+), 116 deletions(-)
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index 0701d7046fc35..fc573fbd4fc99 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -761,7 +761,6 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ;
 ; X86-BMI-LABEL: andnot_bitreverse_i64:
 ; X86-BMI:       # %bb.0:
-; X86-BMI-NEXT:    pushl %esi
 ; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI-NEXT:    bswapl %eax
@@ -775,16 +774,13 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
 ; X86-BMI-NEXT:    shrl $2, %eax
 ; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT:    leal (%eax,%edx,4), %esi
-; X86-BMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; X86-BMI-NEXT:    addl %esi, %esi
-; X86-BMI-NEXT:    shll $2, %edx
-; X86-BMI-NEXT:    notl %edx
-; X86-BMI-NEXT:    andnl %edx, %eax, %eax
+; X86-BMI-NEXT:    leal (%eax,%edx,4), %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
 ; X86-BMI-NEXT:    shrl %eax
-; X86-BMI-NEXT:    orl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-BMI-NEXT:    andnl %eax, %esi, %eax
-; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%edx,2), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI-NEXT:    bswapl %ecx
 ; X86-BMI-NEXT:    movl %ecx, %edx
 ; X86-BMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
@@ -796,17 +792,13 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
 ; X86-BMI-NEXT:    shrl $2, %ecx
 ; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-BMI-NEXT:    leal (%ecx,%edx,4), %esi
-; X86-BMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; X86-BMI-NEXT:    addl %esi, %esi
-; X86-BMI-NEXT:    shll $2, %edx
-; X86-BMI-NEXT:    notl %edx
-; X86-BMI-NEXT:    andnl %edx, %ecx, %ecx
+; X86-BMI-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
 ; X86-BMI-NEXT:    shrl %ecx
-; X86-BMI-NEXT:    orl $-1431655766, %ecx # imm = 0xAAAAAAAA
-; X86-BMI-NEXT:    andnl %ecx, %esi, %edx
-; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%ecx,%edx,2), %ecx
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
 ; X86-BMI-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i64:
@@ -845,23 +837,19 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X64-BMI-NEXT:    andq %rcx, %rsi
 ; X64-BMI-NEXT:    shlq $4, %rsi
 ; X64-BMI-NEXT:    orq %rax, %rsi
-; X64-BMI-NEXT:    movq %rsi, %rax
-; X64-BMI-NEXT:    shrq $2, %rax
-; X64-BMI-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-BMI-NEXT:    andq %rcx, %rax
-; X64-BMI-NEXT:    andq %rcx, %rsi
-; X64-BMI-NEXT:    leaq (,%rsi,4), %rcx
-; X64-BMI-NEXT:    notq %rcx
-; X64-BMI-NEXT:    andnq %rcx, %rax, %rcx
-; X64-BMI-NEXT:    shrq %rcx
-; X64-BMI-NEXT:    movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA
-; X64-BMI-NEXT:    orq %rcx, %rdx
-; X64-BMI-NEXT:    leaq (%rax,%rsi,4), %rax
-; X64-BMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BMI-NEXT:    movq %rsi, %rcx
 ; X64-BMI-NEXT:    andq %rax, %rcx
-; X64-BMI-NEXT:    addq %rcx, %rcx
-; X64-BMI-NEXT:    andnq %rdx, %rcx, %rax
-; X64-BMI-NEXT:    andq %rdi, %rax
+; X64-BMI-NEXT:    shrq $2, %rsi
+; X64-BMI-NEXT:    andq %rax, %rsi
+; X64-BMI-NEXT:    leaq (%rsi,%rcx,4), %rax
+; X64-BMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT:    movq %rax, %rdx
+; X64-BMI-NEXT:    andq %rcx, %rdx
+; X64-BMI-NEXT:    shrq %rax
+; X64-BMI-NEXT:    andq %rcx, %rax
+; X64-BMI-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-BMI-NEXT:    andnq %rdi, %rax, %rax
 ; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not)
@@ -908,16 +896,13 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
 ; X86-BMI-NEXT:    shrl $2, %eax
 ; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT:    leal (%eax,%ecx,4), %edx
-; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; X86-BMI-NEXT:    addl %edx, %edx
-; X86-BMI-NEXT:    shll $2, %ecx
-; X86-BMI-NEXT:    notl %ecx
-; X86-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X86-BMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
 ; X86-BMI-NEXT:    shrl %eax
-; X86-BMI-NEXT:    orl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-BMI-NEXT:    andnl %eax, %edx, %eax
-; X86-BMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i32:
@@ -955,19 +940,16 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X64-BMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
 ; X64-BMI-NEXT:    orl %eax, %esi
 ; X64-BMI-NEXT:    movl %esi, %eax
-; X64-BMI-NEXT:    shrl $2, %eax
 ; X64-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-BMI-NEXT:    shrl $2, %esi
 ; X64-BMI-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; X64-BMI-NEXT:    leal (,%rsi,4), %ecx
-; X64-BMI-NEXT:    notl %ecx
-; X64-BMI-NEXT:    andnl %ecx, %eax, %ecx
-; X64-BMI-NEXT:    shrl %ecx
-; X64-BMI-NEXT:    orl $-1431655766, %ecx # imm = 0xAAAAAAAA
-; X64-BMI-NEXT:    leal (%rax,%rsi,4), %eax
+; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X64-BMI-NEXT:    shrl %eax
 ; X64-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-BMI-NEXT:    addl %eax, %eax
-; X64-BMI-NEXT:    andnl %ecx, %eax, %eax
-; X64-BMI-NEXT:    andl %edi, %eax
+; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT:    andnl %edi, %eax, %eax
 ; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not)
@@ -976,57 +958,30 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 }
 
 define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
-; X86-NOBMI-LABEL: andnot_bitreverse_i16:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    rolw $8, %ax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    andl $3855, %ecx # imm = 0xF0F
-; X86-NOBMI-NEXT:    shll $4, %ecx
-; X86-NOBMI-NEXT:    shrl $4, %eax
-; X86-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X86-NOBMI-NEXT:    orl %ecx, %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    andl $13107, %ecx # imm = 0x3333
-; X86-NOBMI-NEXT:    shrl $2, %eax
-; X86-NOBMI-NEXT:    andl $13107, %eax # imm = 0x3333
-; X86-NOBMI-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    andl $21845, %ecx # imm = 0x5555
-; X86-NOBMI-NEXT:    shrl %eax
-; X86-NOBMI-NEXT:    andl $21845, %eax # imm = 0x5555
-; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NOBMI-NEXT:    notl %eax
-; X86-NOBMI-NEXT:    andw {{[0-9]+}}(%esp), %ax
-; X86-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NOBMI-NEXT:    retl
-;
-; X86-BMI-LABEL: andnot_bitreverse_i16:
-; X86-BMI:       # %bb.0:
-; X86-BMI-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    rolw $8, %ax
-; X86-BMI-NEXT:    movl %eax, %ecx
-; X86-BMI-NEXT:    andl $3855, %ecx # imm = 0xF0F
-; X86-BMI-NEXT:    shll $4, %ecx
-; X86-BMI-NEXT:    shrl $4, %eax
-; X86-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X86-BMI-NEXT:    orl %ecx, %eax
-; X86-BMI-NEXT:    movl %eax, %ecx
-; X86-BMI-NEXT:    andl $13107, %ecx # imm = 0x3333
-; X86-BMI-NEXT:    shrl $2, %eax
-; X86-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
-; X86-BMI-NEXT:    leal (%eax,%ecx,4), %edx
-; X86-BMI-NEXT:    andl $21845, %edx # imm = 0x5555
-; X86-BMI-NEXT:    addl %edx, %edx
-; X86-BMI-NEXT:    shll $2, %ecx
-; X86-BMI-NEXT:    notl %ecx
-; X86-BMI-NEXT:    andnl %ecx, %eax, %eax
-; X86-BMI-NEXT:    shrl %eax
-; X86-BMI-NEXT:    orl $43690, %eax # imm = 0xAAAA
-; X86-BMI-NEXT:    andnl %eax, %edx, %eax
-; X86-BMI-NEXT:    andw {{[0-9]+}}(%esp), %ax
-; X86-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-BMI-NEXT:    retl
+; X86-LABEL: andnot_bitreverse_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    rolw $8, %ax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    andl $13107, %eax # imm = 0x3333
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    andl $21845, %eax # imm = 0x5555
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i16:
 ; X64-NOBMI:       # %bb.0:
@@ -1064,19 +1019,16 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X64-BMI-NEXT:    andl $3855, %esi # imm = 0xF0F
 ; X64-BMI-NEXT:    orl %eax, %esi
 ; X64-BMI-NEXT:    movl %esi, %eax
-; X64-BMI-NEXT:    shrl $2, %eax
 ; X64-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-BMI-NEXT:    shrl $2, %esi
 ; X64-BMI-NEXT:    andl $13107, %esi # imm = 0x3333
-; X64-BMI-NEXT:    leal (,%rsi,4), %ecx
-; X64-BMI-NEXT:    notl %ecx
-; X64-BMI-NEXT:    andnl %ecx, %eax, %ecx
-; X64-BMI-NEXT:    shrl %ecx
-; X64-BMI-NEXT:    orl $-21846, %ecx # imm = 0xAAAA
-; X64-BMI-NEXT:    leal (%rax,%rsi,4), %eax
+; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X64-BMI-NEXT:    shrl %eax
 ; X64-BMI-NEXT:    andl $21845, %eax # imm = 0x5555
-; X64-BMI-NEXT:    addl %eax, %eax
-; X64-BMI-NEXT:    andnl %ecx, %eax, %eax
-; X64-BMI-NEXT:    andl %edi, %eax
+; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT:    andnl %edi, %eax, %eax
 ; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
>From 1245b6b33393488eab867affc2b8c59693c26ecf Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:53:43 +0000
Subject: [PATCH 10/20] [DAG]: Rewrite `~(a | b | c)` into `~a & ~b & ~c`
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5b77dc423b66b..fba8b62f5ca35 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10197,20 +10197,26 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     }
   }
 
-  // fold (not (or A, B)) -> and(not(A), not(B))
+  // fold (not (or A, or(B, C))) -> and(not(A), and(not(B), not(C))
   if (TLI.hasAndNot(SDValue(N, 0))) {
     // If we have AndNot then it is profitable to apply demorgan to make use
     // of the machine instruction.
     SDValue A;
     SDValue B;
+    SDValue C;
     APInt Cst;
-    if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+    if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), m_ConstInt(Cst))) &&
         Cst.isAllOnes()) {
       auto Ty = N->getValueType(0);
+
+      auto NegA = DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty));
+      auto NegB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty));
+      auto NegC = DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty));
+
       return DAG.getNode(
           ISD::AND, DL, VT,
-          DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)),
-          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)));
+          NegA,
+          DAG.getNode(ISD::AND, DL, VT, NegB, NegC));
     }
   }
 
>From 746b101885509bddc2f6ad3460a09eaf61de99ae Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:54:42 +0000
Subject: [PATCH 11/20] [X86]: Created new test
---
 llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll | 171 ++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll
diff --git a/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll b/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll
new file mode 100644
index 0000000000000..a1ace1b6ca157
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI
+
+define i32 @not_rewrite_demorgan_i32(i32 %a, i32 %b) nounwind {
+; X86-WITH-BMI-LABEL: not_rewrite_demorgan_i32:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    notl %eax
+; X86-WITH-BMI-NEXT:    retl
+;
+; X64-WITH-BMI-LABEL: not_rewrite_demorgan_i32:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    movl %edi, %eax
+; X64-WITH-BMI-NEXT:    orl %esi, %eax
+; X64-WITH-BMI-NEXT:    notl %eax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i32:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    retl
+;
+; X64-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i32:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT:    orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT:    notl %eax
+; X64-WITHOUT-BMI-NEXT:    retq
+  %temp = or i32 %b, %a
+  %res = xor i32 %temp, -1
+  ret i32 %res
+}
+
+define i32 @rewrite_demorgan_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; X86-WITH-BMI-LABEL: rewrite_demorgan_i32:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    notl %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %ecx, %ecx
+; X86-WITH-BMI-NEXT:    andnl %ecx, %eax, %eax
+; X86-WITH-BMI-NEXT:    retl
+;
+; X64-WITH-BMI-LABEL: rewrite_demorgan_i32:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    notl %edi
+; X64-WITH-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-WITH-BMI-NEXT:    andnl %eax, %edx, %eax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: rewrite_demorgan_i32:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    retl
+;
+; X64-WITHOUT-BMI-LABEL: rewrite_demorgan_i32:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT:    orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT:    orl %edx, %eax
+; X64-WITHOUT-BMI-NEXT:    notl %eax
+; X64-WITHOUT-BMI-NEXT:    retq
+  %and.demorgan = or i32 %b, %a
+  %and3.demorgan = or i32 %and.demorgan, %c
+  %and3 = xor i32 %and3.demorgan, -1
+  ret i32 %and3
+}
+
+define i64 @not_rewrite_demorgan_i64(i64 %a, i64 %b) nounwind {
+; X86-WITH-BMI-LABEL: not_rewrite_demorgan_i64:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    notl %eax
+; X86-WITH-BMI-NEXT:    notl %edx
+; X86-WITH-BMI-NEXT:    retl
+;
+; X64-WITH-BMI-LABEL: not_rewrite_demorgan_i64:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    movq %rdi, %rax
+; X64-WITH-BMI-NEXT:    orq %rsi, %rax
+; X64-WITH-BMI-NEXT:    notq %rax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i64:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    notl %edx
+; X86-WITHOUT-BMI-NEXT:    retl
+;
+; X64-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i64:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT:    orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT:    notq %rax
+; X64-WITHOUT-BMI-NEXT:    retq
+  %temp = or i64 %b, %a
+  %res = xor i64 %temp, -1
+  ret i64 %res
+}
+
+define i64 @rewrite_demorgan_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; X86-WITH-BMI-LABEL: rewrite_demorgan_i64:
+; X86-WITH-BMI:       # %bb.0:
+; X86-WITH-BMI-NEXT:    pushl %ebx
+; X86-WITH-BMI-NEXT:    pushl %edi
+; X86-WITH-BMI-NEXT:    pushl %esi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-WITH-BMI-NEXT:    notl %edi
+; X86-WITH-BMI-NEXT:    andnl %edi, %edx, %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %eax, %eax
+; X86-WITH-BMI-NEXT:    notl %ebx
+; X86-WITH-BMI-NEXT:    andnl %ebx, %esi, %edx
+; X86-WITH-BMI-NEXT:    andnl %edx, %ecx, %edx
+; X86-WITH-BMI-NEXT:    popl %esi
+; X86-WITH-BMI-NEXT:    popl %edi
+; X86-WITH-BMI-NEXT:    popl %ebx
+; X86-WITH-BMI-NEXT:    retl
+;
+; X64-WITH-BMI-LABEL: rewrite_demorgan_i64:
+; X64-WITH-BMI:       # %bb.0:
+; X64-WITH-BMI-NEXT:    notq %rdi
+; X64-WITH-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-WITH-BMI-NEXT:    andnq %rax, %rdx, %rax
+; X64-WITH-BMI-NEXT:    retq
+;
+; X86-WITHOUT-BMI-LABEL: rewrite_demorgan_i64:
+; X86-WITHOUT-BMI:       # %bb.0:
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT:    notl %eax
+; X86-WITHOUT-BMI-NEXT:    notl %edx
+; X86-WITHOUT-BMI-NEXT:    retl
+;
+; X64-WITHOUT-BMI-LABEL: rewrite_demorgan_i64:
+; X64-WITHOUT-BMI:       # %bb.0:
+; X64-WITHOUT-BMI-NEXT:    movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT:    orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT:    orq %rdx, %rax
+; X64-WITHOUT-BMI-NEXT:    notq %rax
+; X64-WITHOUT-BMI-NEXT:    retq
+  %and.demorgan = or i64 %b, %a
+  %and3.demorgan = or i64 %and.demorgan, %c
+  %and3 = xor i64 %and3.demorgan, -1
+  ret i64 %and3
+}
>From f237020a2005d5b42cc32e0849eacd5ba806ff2f Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:55:33 +0000
Subject: [PATCH 12/20] [DAG]: Run fmt
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fba8b62f5ca35..fbc63d8eb6d40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10205,18 +10205,20 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     SDValue B;
     SDValue C;
     APInt Cst;
-    if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), m_ConstInt(Cst))) &&
+    if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))),
+                          m_ConstInt(Cst))) &&
         Cst.isAllOnes()) {
       auto Ty = N->getValueType(0);
 
-      auto NegA = DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty));
-      auto NegB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty));
-      auto NegC = DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty));
+      auto NegA =
+          DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty));
+      auto NegB =
+          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty));
+      auto NegC =
+          DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty));
 
-      return DAG.getNode(
-          ISD::AND, DL, VT,
-          NegA,
-          DAG.getNode(ISD::AND, DL, VT, NegB, NegC));
+      return DAG.getNode(ISD::AND, DL, VT, NegA,
+                         DAG.getNode(ISD::AND, DL, VT, NegB, NegC));
     }
   }
 
>From 956b849a31b714a235ad160558f1e3ff69f5f363 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:55:51 +0000
Subject: [PATCH 13/20] [AArch64]: Updated tests
---
 llvm/test/CodeGen/AArch64/bsl.ll              | 120 ++++++++-------
 .../CodeGen/AArch64/build-vector-dup-simd.ll  |  24 +--
 llvm/test/CodeGen/AArch64/ctlz.ll             | 139 +++++++++++-------
 llvm/test/CodeGen/AArch64/eon.ll              |  22 ++-
 .../CodeGen/AArch64/fp16-v4-instructions.ll   |  44 ++++--
 .../CodeGen/AArch64/fp16-v8-instructions.ll   |  50 +++++--
 llvm/test/CodeGen/AArch64/sve2-bsl.ll         |  36 +++--
 7 files changed, 258 insertions(+), 177 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll
index df6b6f75b8935..fcf7393d2c801 100644
--- a/llvm/test/CodeGen/AArch64/bsl.ll
+++ b/llvm/test/CodeGen/AArch64/bsl.ll
@@ -32,17 +32,19 @@ define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 ; NEON-LABEL: nbsl_v1i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    bic v1.8b, v1.8b, v2.8b
 ; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    bic v0.8b, v0.8b, v1.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v1i64:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    bic v1.8b, v1.8b, v2.8b
+; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT:    bic v0.8b, v0.8b, v1.8b
 ; SVE2-NEXT:    ret
   %4 = and <1 x i64> %2, %0
   %5 = xor <1 x i64> %2, splat (i64 -1)
@@ -78,9 +80,8 @@ define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 ; NEON-LABEL: bsl2n_v1i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
-; NEON-NEXT:    orr v1.8b, v2.8b, v1.8b
-; NEON-NEXT:    orn v0.8b, v0.8b, v1.8b
+; NEON-NEXT:    mvn v1.8b, v1.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: bsl2n_v1i64:
@@ -118,17 +119,19 @@ define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NEON-LABEL: nbsl_v2i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    bic v1.16b, v1.16b, v2.16b
 ; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v2i64:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    bic v1.16b, v1.16b, v2.16b
+; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; SVE2-NEXT:    ret
   %4 = and <2 x i64> %2, %0
   %5 = xor <2 x i64> %2, splat (i64 -1)
@@ -164,9 +167,8 @@ define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NEON-LABEL: bsl2n_v2i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
-; NEON-NEXT:    orr v1.16b, v2.16b, v1.16b
-; NEON-NEXT:    orn v0.16b, v0.16b, v1.16b
+; NEON-NEXT:    mvn v1.16b, v1.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: bsl2n_v2i64:
@@ -189,17 +191,18 @@ define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
 ; NEON-LABEL: nbsl_v8i8:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
-; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    and v3.8b, v2.8b, v1.8b
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    orn v1.8b, v3.8b, v1.8b
+; NEON-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    and v3.8b, v2.8b, v1.8b
+; SVE2-NEXT:    and v0.8b, v2.8b, v0.8b
+; SVE2-NEXT:    orn v1.8b, v3.8b, v1.8b
+; SVE2-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; SVE2-NEXT:    ret
   %4 = and <8 x i8> %2, %0
   %5 = xor <8 x i8> %2, splat (i8 -1)
@@ -212,17 +215,18 @@ define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
 define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
 ; NEON-LABEL: nbsl_v4i16:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
-; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    and v3.8b, v2.8b, v1.8b
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    orn v1.8b, v3.8b, v1.8b
+; NEON-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    and v3.8b, v2.8b, v1.8b
+; SVE2-NEXT:    and v0.8b, v2.8b, v0.8b
+; SVE2-NEXT:    orn v1.8b, v3.8b, v1.8b
+; SVE2-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; SVE2-NEXT:    ret
   %4 = and <4 x i16> %2, %0
   %5 = xor <4 x i16> %2, splat (i16 -1)
@@ -235,17 +239,19 @@ define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
 define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
 ; NEON-LABEL: nbsl_v2i32:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    bic v1.8b, v1.8b, v2.8b
 ; NEON-NEXT:    mvn v0.8b, v0.8b
+; NEON-NEXT:    bic v0.8b, v0.8b, v1.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v2i32:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT:    bic v1.8b, v1.8b, v2.8b
+; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT:    bic v0.8b, v0.8b, v1.8b
 ; SVE2-NEXT:    ret
   %4 = and <2 x i32> %2, %0
   %5 = xor <2 x i32> %2, splat (i32 -1)
@@ -258,17 +264,18 @@ define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
 define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NEON-LABEL: nbsl_v16i8:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
-; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    and v3.16b, v2.16b, v1.16b
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    orn v1.16b, v3.16b, v1.16b
+; NEON-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    and v3.16b, v2.16b, v1.16b
+; SVE2-NEXT:    and v0.16b, v2.16b, v0.16b
+; SVE2-NEXT:    orn v1.16b, v3.16b, v1.16b
+; SVE2-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; SVE2-NEXT:    ret
   %4 = and <16 x i8> %2, %0
   %5 = xor <16 x i8> %2, splat (i8 -1)
@@ -281,17 +288,18 @@ define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NEON-LABEL: nbsl_v8i16:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
-; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    and v3.16b, v2.16b, v1.16b
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    orn v1.16b, v3.16b, v1.16b
+; NEON-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    and v3.16b, v2.16b, v1.16b
+; SVE2-NEXT:    and v0.16b, v2.16b, v0.16b
+; SVE2-NEXT:    orn v1.16b, v3.16b, v1.16b
+; SVE2-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; SVE2-NEXT:    ret
   %4 = and <8 x i16> %2, %0
   %5 = xor <8 x i16> %2, splat (i16 -1)
@@ -304,17 +312,19 @@ define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NEON-LABEL: nbsl_v4i32:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    bic v1.16b, v1.16b, v2.16b
 ; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v4i32:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    bic v1.16b, v1.16b, v2.16b
+; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; SVE2-NEXT:    ret
   %4 = and <4 x i32> %2, %0
   %5 = xor <4 x i32> %2, splat (i32 -1)
@@ -471,16 +481,14 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 {
 define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 {
 ; NEON-LABEL: nor_q:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    orr v0.16b, v1.16b, v0.16b
-; NEON-NEXT:    mvn v0.16b, v0.16b
+; NEON-NEXT:    mvn v1.16b, v1.16b
+; NEON-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nor_q:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z0.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    mvn v1.16b, v1.16b
+; SVE2-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; SVE2-NEXT:    ret
   %3 = or <2 x i64> %1, %0
   %4 = xor <2 x i64> %3, splat (i64 -1)
diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
index ac0b8e89519dd..af7f9b6d471ad 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
@@ -117,10 +117,10 @@ entry:
 define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
 ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq:
 ; CHECK-NOFULLFP16:       // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT:    fcmgt s2, s0, s1
-; CHECK-NOFULLFP16-NEXT:    fcmgt s0, s1, s0
-; CHECK-NOFULLFP16-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NOFULLFP16-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NOFULLFP16-NEXT:    fcmgt s2, s1, s0
+; CHECK-NOFULLFP16-NEXT:    fcmgt s0, s0, s1
+; CHECK-NOFULLFP16-NEXT:    mvn v1.8b, v2.8b
+; CHECK-NOFULLFP16-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; CHECK-NOFULLFP16-NEXT:    ret
 ;
 ; CHECK-NONANS-LABEL: dup_v1i32_ueq:
@@ -130,10 +130,10 @@ define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
 ;
 ; CHECK-FULLFP16-LABEL: dup_v1i32_ueq:
 ; CHECK-FULLFP16:       // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT:    fcmgt s2, s0, s1
-; CHECK-FULLFP16-NEXT:    fcmgt s0, s1, s0
-; CHECK-FULLFP16-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-FULLFP16-NEXT:    mvn v0.8b, v0.8b
+; CHECK-FULLFP16-NEXT:    fcmgt s2, s1, s0
+; CHECK-FULLFP16-NEXT:    fcmgt s0, s0, s1
+; CHECK-FULLFP16-NEXT:    mvn v1.8b, v2.8b
+; CHECK-FULLFP16-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; CHECK-FULLFP16-NEXT:    ret
 entry:
   %0 = fcmp ueq float %a, %b
@@ -260,10 +260,10 @@ entry:
 define <1 x float> @dup_v1i32_uno(float %a, float %b) {
 ; CHECK-LABEL: dup_v1i32_uno:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcmge s2, s0, s1
-; CHECK-NEXT:    fcmgt s0, s1, s0
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    fcmgt s2, s1, s0
+; CHECK-NEXT:    fcmge s0, s0, s1
+; CHECK-NEXT:    mvn v1.8b, v2.8b
+; CHECK-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; CHECK-NEXT:    ret
 entry:
   %0 = fcmp uno float %a, %b
diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll
index 04124609eec74..f459cc2d78442 100644
--- a/llvm/test/CodeGen/AArch64/ctlz.ll
+++ b/llvm/test/CodeGen/AArch64/ctlz.ll
@@ -276,18 +276,23 @@ define <2 x i64> @v2i64(<2 x i64> %d) {
 ; CHECK-SD-LABEL: v2i64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #1
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #2
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #4
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #8
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #16
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #32
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    orr v2.16b, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    mvn v0.16b, v0.16b
+; CHECK-SD-NEXT:    ushr v3.2d, v2.2d, #2
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    orr v2.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v3.16b
+; CHECK-SD-NEXT:    ushr v4.2d, v2.2d, #4
+; CHECK-SD-NEXT:    orr v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT:    ushr v1.2d, v2.2d, #8
+; CHECK-SD-NEXT:    orr v2.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    ushr v3.2d, v2.2d, #16
+; CHECK-SD-NEXT:    orr v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v3.16b
+; CHECK-SD-NEXT:    ushr v1.2d, v1.2d, #32
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-SD-NEXT:    uaddlp v0.8h, v0.16b
 ; CHECK-SD-NEXT:    uaddlp v0.4s, v0.8h
@@ -314,34 +319,44 @@ define <3 x i64> @v3i64(<3 x i64> %d) {
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    ushr v4.2d, v2.2d, #1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    orr v6.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT:    mvn v2.16b, v2.16b
 ; CHECK-SD-NEXT:    ushr v1.2d, v0.2d, #1
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ushr v1.2d, v2.2d, #1
-; CHECK-SD-NEXT:    ushr v3.2d, v0.2d, #2
-; CHECK-SD-NEXT:    orr v1.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #2
-; CHECK-SD-NEXT:    ushr v3.2d, v0.2d, #4
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #4
-; CHECK-SD-NEXT:    ushr v3.2d, v0.2d, #8
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #8
-; CHECK-SD-NEXT:    ushr v3.2d, v0.2d, #16
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #16
-; CHECK-SD-NEXT:    ushr v3.2d, v0.2d, #32
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #32
+; CHECK-SD-NEXT:    ushr v7.2d, v6.2d, #2
+; CHECK-SD-NEXT:    bic v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT:    orr v3.16b, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    mvn v0.16b, v0.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT:    orr v6.16b, v6.16b, v7.16b
+; CHECK-SD-NEXT:    bic v2.16b, v2.16b, v7.16b
+; CHECK-SD-NEXT:    ushr v5.2d, v3.2d, #2
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    ushr v17.2d, v6.2d, #4
+; CHECK-SD-NEXT:    orr v3.16b, v3.16b, v5.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v5.16b
+; CHECK-SD-NEXT:    orr v6.16b, v6.16b, v17.16b
+; CHECK-SD-NEXT:    bic v2.16b, v2.16b, v17.16b
+; CHECK-SD-NEXT:    ushr v16.2d, v3.2d, #4
+; CHECK-SD-NEXT:    ushr v4.2d, v6.2d, #8
+; CHECK-SD-NEXT:    orr v3.16b, v3.16b, v16.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v16.16b
+; CHECK-SD-NEXT:    orr v6.16b, v6.16b, v4.16b
+; CHECK-SD-NEXT:    bic v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT:    ushr v1.2d, v3.2d, #8
+; CHECK-SD-NEXT:    orr v3.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    ushr v5.2d, v3.2d, #16
+; CHECK-SD-NEXT:    orr v1.16b, v3.16b, v5.16b
+; CHECK-SD-NEXT:    ushr v3.2d, v6.2d, #16
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v5.16b
+; CHECK-SD-NEXT:    ushr v1.2d, v1.2d, #32
+; CHECK-SD-NEXT:    orr v4.16b, v6.16b, v3.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    bic v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT:    ushr v2.2d, v4.2d, #32
 ; CHECK-SD-NEXT:    cnt v0.16b, v0.16b
-; CHECK-SD-NEXT:    mvn v1.16b, v1.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v2.16b
 ; CHECK-SD-NEXT:    cnt v1.16b, v1.16b
 ; CHECK-SD-NEXT:    uaddlp v0.8h, v0.16b
 ; CHECK-SD-NEXT:    uaddlp v0.4s, v0.8h
@@ -377,30 +392,40 @@ define <4 x i64> @v4i64(<4 x i64> %d) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ushr v2.2d, v0.2d, #1
 ; CHECK-SD-NEXT:    ushr v3.2d, v1.2d, #1
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v0.2d, #2
-; CHECK-SD-NEXT:    ushr v3.2d, v1.2d, #2
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v0.2d, #4
-; CHECK-SD-NEXT:    ushr v3.2d, v1.2d, #4
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v0.2d, #8
-; CHECK-SD-NEXT:    ushr v3.2d, v1.2d, #8
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v0.2d, #16
-; CHECK-SD-NEXT:    ushr v3.2d, v1.2d, #16
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT:    ushr v2.2d, v0.2d, #32
-; CHECK-SD-NEXT:    ushr v3.2d, v1.2d, #32
-; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT:    orr v4.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    orr v5.16b, v1.16b, v3.16b
 ; CHECK-SD-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-SD-NEXT:    mvn v1.16b, v1.16b
+; CHECK-SD-NEXT:    ushr v6.2d, v4.2d, #2
+; CHECK-SD-NEXT:    ushr v7.2d, v5.2d, #2
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT:    orr v4.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT:    orr v5.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v6.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v7.16b
+; CHECK-SD-NEXT:    ushr v16.2d, v4.2d, #4
+; CHECK-SD-NEXT:    ushr v17.2d, v5.2d, #4
+; CHECK-SD-NEXT:    orr v4.16b, v4.16b, v16.16b
+; CHECK-SD-NEXT:    orr v5.16b, v5.16b, v17.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v16.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v17.16b
+; CHECK-SD-NEXT:    ushr v2.2d, v4.2d, #8
+; CHECK-SD-NEXT:    ushr v3.2d, v5.2d, #8
+; CHECK-SD-NEXT:    orr v4.16b, v4.16b, v2.16b
+; CHECK-SD-NEXT:    orr v5.16b, v5.16b, v3.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT:    ushr v6.2d, v4.2d, #16
+; CHECK-SD-NEXT:    ushr v7.2d, v5.2d, #16
+; CHECK-SD-NEXT:    orr v2.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT:    orr v3.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v6.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v7.16b
+; CHECK-SD-NEXT:    ushr v2.2d, v2.2d, #32
+; CHECK-SD-NEXT:    ushr v3.2d, v3.2d, #32
+; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT:    bic v1.16b, v1.16b, v3.16b
 ; CHECK-SD-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-SD-NEXT:    cnt v1.16b, v1.16b
 ; CHECK-SD-NEXT:    uaddlp v0.8h, v0.16b
diff --git a/llvm/test/CodeGen/AArch64/eon.ll b/llvm/test/CodeGen/AArch64/eon.ll
index 8b31cbfe16b1a..ea0e0122d9b6d 100644
--- a/llvm/test/CodeGen/AArch64/eon.ll
+++ b/llvm/test/CodeGen/AArch64/eon.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
 ; RUN: llc %s -pass-remarks-missed=gisel* -mtriple=aarch64-none-linux-gnu -global-isel -o - 2>&1 | FileCheck %s
 
@@ -6,8 +7,9 @@
 ; Check that the eon instruction is generated instead of eor,movn
 define i64 @test1(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test1:
-; CHECK: eon
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eon x0, x0, x1, lsl #4
+; CHECK-NEXT:    ret
 entry:
   %shl = shl i64 %b, 4
   %neg = xor i64 %a, -1
@@ -18,10 +20,11 @@ entry:
 ; Same check with multiple uses of %neg
 define i64 @test2(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test2:
-; CHECK: eon
-; CHECK: eon
-; CHECK: lsl
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eon x8, x0, x1, lsl #4
+; CHECK-NEXT:    eon x9, x2, x1, lsl #4
+; CHECK-NEXT:    lsl x0, x8, x9
+; CHECK-NEXT:    ret
 entry:
   %shl = shl i64 %b, 4
   %neg = xor i64 %shl, -1
@@ -33,9 +36,6 @@ entry:
 
 ; Check that eon is generated if the xor is a disjoint or.
 define i64 @disjoint_or(i64 %a, i64 %b) {
-; CHECK-LABEL: disjoint_or:
-; CHECK: eon
-; CHECK: ret
   %or = or disjoint i64 %a, %b
   %eon = xor i64 %or, -1
   ret i64 %eon
@@ -43,10 +43,6 @@ define i64 @disjoint_or(i64 %a, i64 %b) {
 
 ; Check that eon is *not* generated if the or is not disjoint.
 define i64 @normal_or(i64 %a, i64 %b) {
-; CHECK-LABEL: normal_or:
-; CHECK: orr
-; CHECK: mvn
-; CHECK: ret
   %or = or i64 %a, %b
   %not = xor i64 %or, -1
   ret i64 %not
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 6233ce743b706..529b76cf84906 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -563,13 +563,13 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: test_fcmp_ueq:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcmgt v2.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-NEXT:    mvn v0.8b, v0.8b
-; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
+; CHECK-FP16-SD:       // %bb.0:
+; CHECK-FP16-SD-NEXT:    fcmgt v2.4h, v1.4h, v0.4h
+; CHECK-FP16-SD-NEXT:    fcmgt v0.4h, v0.4h, v1.4h
+; CHECK-FP16-SD-NEXT:    mvn v1.8b, v2.8b
+; CHECK-FP16-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -581,6 +581,14 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-CVT-GI-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-CVT-GI-NEXT:    ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
+; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    fcmgt v2.4h, v0.4h, v1.4h
+; CHECK-FP16-GI-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-GI-NEXT:    mvn v0.8b, v0.8b
+; CHECK-FP16-GI-NEXT:    ret
 
   %1 = fcmp ueq <4 x half> %a, %b
   ret <4 x i1> %1
@@ -714,13 +722,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: test_fcmp_uno:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcmge v2.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-NEXT:    mvn v0.8b, v0.8b
-; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-SD-LABEL: test_fcmp_uno:
+; CHECK-FP16-SD:       // %bb.0:
+; CHECK-FP16-SD-NEXT:    fcmgt v2.4h, v1.4h, v0.4h
+; CHECK-FP16-SD-NEXT:    fcmge v0.4h, v0.4h, v1.4h
+; CHECK-FP16-SD-NEXT:    mvn v1.8b, v2.8b
+; CHECK-FP16-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_uno:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -732,6 +740,14 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-CVT-GI-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-CVT-GI-NEXT:    ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_uno:
+; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    fcmge v2.4h, v0.4h, v1.4h
+; CHECK-FP16-GI-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-GI-NEXT:    mvn v0.8b, v0.8b
+; CHECK-FP16-GI-NEXT:    ret
 
   %1 = fcmp uno <4 x half> %a, %b
   ret <4 x i1> %1
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 86763eb5f9e3b..6d67fc9ebe1c6 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -990,14 +990,14 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: test_fcmp_ueq:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcmgt v2.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT:    mvn v0.16b, v0.16b
-; CHECK-FP16-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
+; CHECK-FP16-SD:       // %bb.0:
+; CHECK-FP16-SD-NEXT:    fcmgt v2.8h, v1.8h, v0.8h
+; CHECK-FP16-SD-NEXT:    fcmgt v0.8h, v0.8h, v1.8h
+; CHECK-FP16-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-FP16-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -1016,6 +1016,15 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-CVT-GI-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-GI-NEXT:    ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
+; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    fcmgt v2.8h, v0.8h, v1.8h
+; CHECK-FP16-GI-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-FP16-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-FP16-GI-NEXT:    ret
   %1 = fcmp ueq <8 x half> %a, %b
   ret <8 x i1> %1
 }
@@ -1190,14 +1199,14 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: test_fcmp_uno:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcmge v2.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT:    mvn v0.16b, v0.16b
-; CHECK-FP16-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-SD-LABEL: test_fcmp_uno:
+; CHECK-FP16-SD:       // %bb.0:
+; CHECK-FP16-SD-NEXT:    fcmgt v2.8h, v1.8h, v0.8h
+; CHECK-FP16-SD-NEXT:    fcmge v0.8h, v0.8h, v1.8h
+; CHECK-FP16-SD-NEXT:    mvn v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-FP16-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_uno:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -1216,6 +1225,15 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-CVT-GI-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-GI-NEXT:    ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_uno:
+; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    fcmge v2.8h, v0.8h, v1.8h
+; CHECK-FP16-GI-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-GI-NEXT:    mvn v0.16b, v0.16b
+; CHECK-FP16-GI-NEXT:    xtn v0.8b, v0.8h
+; CHECK-FP16-GI-NEXT:    ret
   %1 = fcmp uno <8 x half> %a, %b
   ret <8 x i1> %1
 }
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 6cfe66eb8e633..80293388a5cf9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -46,7 +46,9 @@ define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-LABEL: nbsl_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, #127 // =0x7f
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    and z1.b, z1.b, #0x80
+; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT:    bic z0.d, z2.d, z1.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 16 x i8> %a, splat(i8 127)
   %2 = and <vscale x 16 x i8> %b, splat(i8 -128)
@@ -59,7 +61,9 @@ define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 ; CHECK-LABEL: nbsl_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    and z1.h, z1.h, #0x8000
+; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT:    bic z0.d, z2.d, z1.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 8 x i16> %a, splat(i16 32767)
   %2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
@@ -72,7 +76,9 @@ define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 ; CHECK-LABEL: nbsl_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.s, #0x7fffffff
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    and z1.s, z1.s, #0x80000000
+; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT:    bic z0.d, z2.d, z1.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
   %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
@@ -85,7 +91,9 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 ; CHECK-LABEL: nbsl_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    and z1.d, z1.d, #0x8000000000000000
+; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT:    bic z0.d, z2.d, z1.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
   %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
@@ -115,7 +123,9 @@ define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x
 define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
 ; CHECK-LABEL: codegen_nbsl_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    bic z1.d, z1.d, z2.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 16 x i8> %2, %0
   %5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
@@ -165,7 +175,9 @@ define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x
 define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
 ; CHECK-LABEL: codegen_nbsl_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    bic z1.d, z1.d, z2.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 8 x i16> %2, %0
   %5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
@@ -215,7 +227,9 @@ define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x
 define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
 ; CHECK-LABEL: codegen_nbsl_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    bic z1.d, z1.d, z2.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 4 x i32> %2, %0
   %5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
@@ -265,7 +279,9 @@ define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x
 define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
 ; CHECK-LABEL: codegen_nbsl_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT:    bic z1.d, z1.d, z2.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 2 x i64> %2, %0
   %5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
@@ -341,7 +357,9 @@ define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
 define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
 ; CHECK-LABEL: nor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    eor z1.d, z1.d, z2.d
+; CHECK-NEXT:    bic z0.d, z1.d, z0.d
 ; CHECK-NEXT:    ret
   %3 = or <vscale x 2 x i64> %1, %0
   %4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
>From 643e4d53a6884f33b960c1fca389422999611d01 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:56:14 +0000
Subject: [PATCH 14/20] [X86]: Updated tests
---
 llvm/test/CodeGen/X86/abds-vector-128.ll      |   6 +-
 .../test/CodeGen/X86/avx512-mask-bit-manip.ll |  25 +-
 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 174 ------------
 llvm/test/CodeGen/X86/bool-ext-inc.ll         |   7 +-
 llvm/test/CodeGen/X86/combine-or.ll           |  39 ++-
 llvm/test/CodeGen/X86/combine-srl.ll          |   6 +-
 .../CodeGen/X86/expand-vp-int-intrinsics.ll   |   6 +-
 llvm/test/CodeGen/X86/icmp-abs-C-vec.ll       | 105 +++----
 llvm/test/CodeGen/X86/icmp-pow2-diff.ll       |  54 ++--
 llvm/test/CodeGen/X86/ispow2.ll               |  24 +-
 llvm/test/CodeGen/X86/machine-cp.ll           |  67 ++---
 llvm/test/CodeGen/X86/mul-cmp.ll              |  16 +-
 llvm/test/CodeGen/X86/promote-cmp.ll          |  34 +--
 llvm/test/CodeGen/X86/sat-add.ll              |  10 +-
 llvm/test/CodeGen/X86/setcc-combine.ll        |   6 +-
 llvm/test/CodeGen/X86/setcc-logic.ll          |   7 +-
 .../CodeGen/X86/srem-seteq-vec-nonsplat.ll    |  16 +-
 llvm/test/CodeGen/X86/sshl_sat_vec.ll         |   6 +-
 ...-masked-merge-vector-variablemask-const.ll |  15 +-
 .../X86/urem-seteq-vec-tautological.ll        |  12 +-
 llvm/test/CodeGen/X86/vec_cmp_sint-128.ll     |  48 ++--
 llvm/test/CodeGen/X86/vec_cmp_uint-128.ll     |  48 ++--
 llvm/test/CodeGen/X86/vec_compare.ll          |  24 +-
 llvm/test/CodeGen/X86/vec_ctbits.ll           |  12 +-
 llvm/test/CodeGen/X86/vec_setcc-2.ll          |  13 +-
 llvm/test/CodeGen/X86/vector-lzcnt-128.ll     |  96 +++----
 llvm/test/CodeGen/X86/vector-lzcnt-512.ll     | 266 +++++++++---------
 llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll  |   6 +-
 llvm/test/CodeGen/X86/vector-popcnt-128.ll    |  10 +-
 llvm/test/CodeGen/X86/vector-unsigned-cmp.ll  |  24 +-
 llvm/test/CodeGen/X86/vsplit-and.ll           |  22 +-
 31 files changed, 534 insertions(+), 670 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index 148be83892b72..bc57a31f063b5 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -756,9 +756,9 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
 ; SSE2-NEXT:    pand %xmm6, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    paddq %xmm4, %xmm0
 ; SSE2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
index 3fcfb9d278da7..37df42ea2682d 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
@@ -714,18 +714,19 @@ define <64 x i8> @tzmsk_v64i8(<64 x i8> %a0, <64 x i8> %a1) {
 ; AVX512F-NEXT:    vpmovmskb %ymm4, %ecx
 ; AVX512F-NEXT:    shlq $32, %rcx
 ; AVX512F-NEXT:    leaq (%rax,%rcx), %rdx
-; AVX512F-NEXT:    addq %rcx, %rax
-; AVX512F-NEXT:    addq $-1, %rax
-; AVX512F-NEXT:    andnq %rax, %rdx, %rax
-; AVX512F-NEXT:    movq %rax, %rcx
-; AVX512F-NEXT:    movl %eax, %edx
-; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    shrq $32, %rax
-; AVX512F-NEXT:    shrq $48, %rcx
-; AVX512F-NEXT:    shrl $16, %edx
-; AVX512F-NEXT:    kmovw %edx, %k2
-; AVX512F-NEXT:    kmovw %ecx, %k3
-; AVX512F-NEXT:    kmovw %eax, %k4
+; AVX512F-NEXT:    addq $-1, %rdx
+; AVX512F-NEXT:    notq %rcx
+; AVX512F-NEXT:    andnq %rcx, %rax, %rax
+; AVX512F-NEXT:    andq %rax, %rdx
+; AVX512F-NEXT:    movq %rdx, %rax
+; AVX512F-NEXT:    movl %edx, %ecx
+; AVX512F-NEXT:    kmovw %edx, %k1
+; AVX512F-NEXT:    shrq $32, %rdx
+; AVX512F-NEXT:    shrq $48, %rax
+; AVX512F-NEXT:    shrl $16, %ecx
+; AVX512F-NEXT:    kmovw %ecx, %k2
+; AVX512F-NEXT:    kmovw %eax, %k3
+; AVX512F-NEXT:    kmovw %edx, %k4
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm3, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm0, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
deleted file mode 100644
index 7f3a376b24b2a..0000000000000
--- a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
+++ /dev/null
@@ -1,174 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI
-
-define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_i32:
-; X86-WITH-BMI:       # %bb.0:
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT:    notl %ecx
-; X86-WITH-BMI-NEXT:    andnl %ecx, %eax, %eax
-; X86-WITH-BMI-NEXT:    retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_i32:
-; X64-WITH-BMI:       # %bb.0:
-; X64-WITH-BMI-NEXT:    notl %edi
-; X64-WITH-BMI-NEXT:    andnl %edi, %esi, %eax
-; X64-WITH-BMI-NEXT:    retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
-; X86-WITHOUT-BMI:       # %bb.0:
-; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    notl %eax
-; X86-WITHOUT-BMI-NEXT:    retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
-; X64-WITHOUT-BMI:       # %bb.0:
-; X64-WITHOUT-BMI-NEXT:    movl %edi, %eax
-; X64-WITHOUT-BMI-NEXT:    orl %esi, %eax
-; X64-WITHOUT-BMI-NEXT:    notl %eax
-; X64-WITHOUT-BMI-NEXT:    retq
-  %temp = or i32 %b, %a
-  %res = xor i32 %temp, -1
-  ret i32 %res
-}
-
-define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X86-WITH-BMI:       # %bb.0:
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-WITH-BMI-NEXT:    notl %edx
-; X86-WITH-BMI-NEXT:    andnl %edx, %ecx, %ecx
-; X86-WITH-BMI-NEXT:    andnl %ecx, %eax, %eax
-; X86-WITH-BMI-NEXT:    retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X64-WITH-BMI:       # %bb.0:
-; X64-WITH-BMI-NEXT:    notl %edi
-; X64-WITH-BMI-NEXT:    andnl %edi, %esi, %eax
-; X64-WITH-BMI-NEXT:    andnl %eax, %edx, %eax
-; X64-WITH-BMI-NEXT:    retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X86-WITHOUT-BMI:       # %bb.0:
-; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    notl %eax
-; X86-WITHOUT-BMI-NEXT:    retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X64-WITHOUT-BMI:       # %bb.0:
-; X64-WITHOUT-BMI-NEXT:    movl %edi, %eax
-; X64-WITHOUT-BMI-NEXT:    orl %esi, %eax
-; X64-WITHOUT-BMI-NEXT:    orl %edx, %eax
-; X64-WITHOUT-BMI-NEXT:    notl %eax
-; X64-WITHOUT-BMI-NEXT:    retq
-  %and.demorgan = or i32 %b, %a
-  %and3.demorgan = or i32 %and.demorgan, %c
-  %and3 = xor i32 %and3.demorgan, -1
-  ret i32 %and3
-}
-
-define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_i64:
-; X86-WITH-BMI:       # %bb.0:
-; X86-WITH-BMI-NEXT:    pushl %esi
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-WITH-BMI-NEXT:    notl %edx
-; X86-WITH-BMI-NEXT:    andnl %edx, %eax, %eax
-; X86-WITH-BMI-NEXT:    notl %esi
-; X86-WITH-BMI-NEXT:    andnl %esi, %ecx, %edx
-; X86-WITH-BMI-NEXT:    popl %esi
-; X86-WITH-BMI-NEXT:    retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_i64:
-; X64-WITH-BMI:       # %bb.0:
-; X64-WITH-BMI-NEXT:    notq %rdi
-; X64-WITH-BMI-NEXT:    andnq %rdi, %rsi, %rax
-; X64-WITH-BMI-NEXT:    retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
-; X86-WITHOUT-BMI:       # %bb.0:
-; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    notl %eax
-; X86-WITHOUT-BMI-NEXT:    notl %edx
-; X86-WITHOUT-BMI-NEXT:    retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
-; X64-WITHOUT-BMI:       # %bb.0:
-; X64-WITHOUT-BMI-NEXT:    movq %rdi, %rax
-; X64-WITHOUT-BMI-NEXT:    orq %rsi, %rax
-; X64-WITHOUT-BMI-NEXT:    notq %rax
-; X64-WITHOUT-BMI-NEXT:    retq
-  %temp = or i64 %b, %a
-  %res = xor i64 %temp, -1
-  ret i64 %res
-}
-
-define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X86-WITH-BMI:       # %bb.0:
-; X86-WITH-BMI-NEXT:    pushl %ebx
-; X86-WITH-BMI-NEXT:    pushl %edi
-; X86-WITH-BMI-NEXT:    pushl %esi
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-WITH-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-WITH-BMI-NEXT:    notl %edi
-; X86-WITH-BMI-NEXT:    andnl %edi, %edx, %edx
-; X86-WITH-BMI-NEXT:    andnl %edx, %eax, %eax
-; X86-WITH-BMI-NEXT:    notl %ebx
-; X86-WITH-BMI-NEXT:    andnl %ebx, %esi, %edx
-; X86-WITH-BMI-NEXT:    andnl %edx, %ecx, %edx
-; X86-WITH-BMI-NEXT:    popl %esi
-; X86-WITH-BMI-NEXT:    popl %edi
-; X86-WITH-BMI-NEXT:    popl %ebx
-; X86-WITH-BMI-NEXT:    retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X64-WITH-BMI:       # %bb.0:
-; X64-WITH-BMI-NEXT:    notq %rdi
-; X64-WITH-BMI-NEXT:    andnq %rdi, %rsi, %rax
-; X64-WITH-BMI-NEXT:    andnq %rax, %rdx, %rax
-; X64-WITH-BMI-NEXT:    retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X86-WITHOUT-BMI:       # %bb.0:
-; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT:    notl %eax
-; X86-WITHOUT-BMI-NEXT:    notl %edx
-; X86-WITHOUT-BMI-NEXT:    retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X64-WITHOUT-BMI:       # %bb.0:
-; X64-WITHOUT-BMI-NEXT:    movq %rdi, %rax
-; X64-WITHOUT-BMI-NEXT:    orq %rsi, %rax
-; X64-WITHOUT-BMI-NEXT:    orq %rdx, %rax
-; X64-WITHOUT-BMI-NEXT:    notq %rax
-; X64-WITHOUT-BMI-NEXT:    retq
-  %and.demorgan = or i64 %b, %a
-  %and3.demorgan = or i64 %and.demorgan, %c
-  %and3 = xor i64 %and3.demorgan, -1
-  ret i64 %and3
-}
diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll
index 088b0ce857f20..d89893f94bdae 100644
--- a/llvm/test/CodeGen/X86/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll
@@ -88,8 +88,11 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrld $31, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpandn %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-NEXT:    vpandn %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %cmp1 = icmp ne <4 x i32> %a, %b
   %cmp2 = icmp ne <4 x i32> %c, %d
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 8c91274abf3dd..8d5bbb4ae8e1e 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -183,14 +183,32 @@ define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind {
 }
 
 define i64 @or_build_pair_not(i32 %a0, i32 %a1) {
-; CHECK-LABEL: or_build_pair_not:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT:    shlq $32, %rsi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    retq
+; SSE-LABEL: or_build_pair_not:
+; SSE:       # %bb.0:
+; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE-NEXT:    shlq $32, %rsi
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    orq %rsi, %rax
+; SSE-NEXT:    notq %rax
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: or_build_pair_not:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX1-NEXT:    shlq $32, %rsi
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    orq %rsi, %rax
+; AVX1-NEXT:    notq %rax
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: or_build_pair_not:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT:    shlq $32, %rsi
+; AVX2-NEXT:    notq %rsi
+; AVX2-NEXT:    movl %edi, %eax
+; AVX2-NEXT:    andnq %rsi, %rax, %rax
+; AVX2-NEXT:    retq
   %n0 = xor i32 %a0, -1
   %n1 = xor i32 %a1, -1
   %x0 = zext i32 %n0 to i64
@@ -262,10 +280,9 @@ define i64 @PR89533(<64 x i8> %a0) {
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm0
 ; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
 ; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    orq %rax, %rcx
 ; AVX2-NEXT:    notq %rcx
-; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    tzcntq %rcx, %rax
+; AVX2-NEXT:    andnq %rcx, %rax, %rax
+; AVX2-NEXT:    tzcntq %rax, %rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
   %cmp = icmp ne <64 x i8> %a0, <i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95>
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 7bc90534dcc6e..4e31177023b08 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -440,9 +440,9 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrld $16, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
index dbfa69d497698..7919495821efd 100644
--- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
@@ -1490,9 +1490,9 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; SSE-NEXT:    por %xmm1, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrld $16, %xmm1
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm2, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrlw $1, %xmm1
 ; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 23dcf334124c0..f59e53687ff74 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -640,8 +640,8 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
 ; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm0, %ymm0
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpandn %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
@@ -650,17 +650,18 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
 ; SSE41-LABEL: ne_and_to_abs_vec4x64:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
-; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
-; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT:    pcmpeqq %xmm5, %xmm0
-; SSE41-NEXT:    por %xmm3, %xmm0
-; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
-; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
 ; SSE41-NEXT:    packssdw %xmm1, %xmm0
-; SSE41-NEXT:    pxor %xmm4, %xmm0
+; SSE41-NEXT:    pxor %xmm3, %xmm0
+; SSE41-NEXT:    pandn %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: ne_and_to_abs_vec4x64:
@@ -681,8 +682,9 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
 ; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
 ; SSE2-NEXT:    andps %xmm4, %xmm0
-; SSE2-NEXT:    orps %xmm2, %xmm0
 ; SSE2-NEXT:    xorps %xmm3, %xmm0
+; SSE2-NEXT:    andnps %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
 ; SSE2-NEXT:    retq
   %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
   %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
@@ -706,51 +708,51 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
 ; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm0, %ymm0
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpandn %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT:    pcmpeqq %xmm1, %xmm2
-; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
-; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT:    pcmpeqq %xmm5, %xmm0
-; SSE41-NEXT:    por %xmm3, %xmm0
-; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
-; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT:    pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
 ; SSE41-NEXT:    packssdw %xmm1, %xmm0
-; SSE41-NEXT:    pxor %xmm4, %xmm0
-; SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pxor %xmm3, %xmm0
+; SSE41-NEXT:    pandn %xmm0, %xmm2
+; SSE41-NEXT:    pmovsxdq %xmm2, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
 ; SSE41-NEXT:    pslld $31, %xmm1
 ; SSE41-NEXT:    psrad $31, %xmm1
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: ne_and_to_abs_vec4x64_sext:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [129,129]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
 ; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
-; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE2-NEXT:    andps %xmm4, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; SSE2-NEXT:    andps %xmm4, %xmm0
 ; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE2-NEXT:    andps %xmm4, %xmm0
-; SSE2-NEXT:    orps %xmm2, %xmm0
-; SSE2-NEXT:    xorps %xmm3, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; SSE2-NEXT:    andps %xmm4, %xmm2
+; SSE2-NEXT:    xorps %xmm3, %xmm2
+; SSE2-NEXT:    andnps %xmm2, %xmm0
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
@@ -868,8 +870,9 @@ define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
   %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -909,8 +912,9 @@ define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
   %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1031,8 +1035,8 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
 ; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpandn %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -1042,21 +1046,22 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
 ; SSE41-NEXT:    pcmpeqb %xmm0, %xmm1
 ; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT:    por %xmm1, %xmm0
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
-; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
+; SSE41-NEXT:    pandn %xmm0, %xmm1
+; SSE41-NEXT:    pmovsxbd %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: ne_and_to_abs_vec4x8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u]
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
   %cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
   %cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
@@ -1087,8 +1092,8 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
 ; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpandn %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; SSE41-LABEL: ne_and_to_abs_vec4x16_sext:
@@ -1097,8 +1102,9 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
 ; SSE41-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT:    por %xmm1, %xmm0
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    pandn %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: ne_and_to_abs_vec4x16_sext:
@@ -1107,8 +1113,9 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
 ; SSE2-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
   %cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
   %cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
diff --git a/llvm/test/CodeGen/X86/icmp-pow2-diff.ll b/llvm/test/CodeGen/X86/icmp-pow2-diff.ll
index dada1726be424..3fc2a323b5dc1 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-diff.ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-diff.ll
@@ -151,7 +151,7 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm2
 ; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $54, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} xmm0 = ~xmm2 & (xmm0 ^ xmm1)
 ; AVX512-NEXT:    retq
 ;
 ; AVX2-LABEL: andnot_ne_v8i16_todo_no_splat:
@@ -159,18 +159,19 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpandn %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; SSE-LABEL: andnot_ne_v8i16_todo_no_splat:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE-NEXT:    movdqa %xmm0, %xmm2
-; SSE-NEXT:    pcmpeqw %xmm1, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
 ; SSE-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT:    por %xmm2, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
   %cmp1 = icmp ne <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %cmp2 = icmp ne <8 x i16> %x, <i16 -16385, i16 -257, i16 -33, i16 -8193, i16 -16385, i16 -257, i16 -33, i16 -8193>
@@ -184,7 +185,7 @@ define <8 x i1> @andnot_ne_v8i16(<8 x i16> %x) nounwind {
 ; AVX512-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} xmm0 = ~xmm0
 ; AVX512-NEXT:    retq
 ;
 ; AVX2-LABEL: andnot_ne_v8i16:
@@ -215,28 +216,29 @@ define <16 x i1> @andnot_ne_v16i8_fail_max_not_n1(<16 x i8> %x) nounwind {
 ; AVX512-LABEL: andnot_ne_v16i8_fail_max_not_n1:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $54, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
+; AVX512-NEXT:    vpandn %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
 ;
 ; AVX2-LABEL: andnot_ne_v16i8_fail_max_not_n1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX2-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
+; AVX2-NEXT:    vpandn %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; SSE-LABEL: andnot_ne_v16i8_fail_max_not_n1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE-NEXT:    movdqa %xmm0, %xmm2
-; SSE-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE-NEXT:    pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT:    por %xmm2, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
   %cmp1 = icmp ne <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %cmp2 = icmp ne <16 x i8> %x, <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
@@ -250,7 +252,7 @@ define <16 x i1> @andnot_ne_v16i8(<16 x i8> %x) nounwind {
 ; AVX512-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} xmm0 = ~xmm0
 ; AVX512-NEXT:    retq
 ;
 ; AVX2-LABEL: andnot_ne_v16i8:
@@ -309,7 +311,7 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
 ; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX512-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $86, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} xmm0 = ~xmm1 & (xmm0 ^ xmm2)
 ; AVX512-NEXT:    retq
 ;
 ; AVX2-LABEL: addand_ne_v8i16_fail:
@@ -317,8 +319,8 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
 ; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpandn %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; SSE41-LABEL: addand_ne_v8i16_fail:
@@ -327,8 +329,9 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
 ; SSE41-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT:    por %xmm1, %xmm0
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    pandn %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE2-LABEL: addand_ne_v8i16_fail:
@@ -337,8 +340,9 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
 ; SSE2-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
   %cmp1 = icmp ne <8 x i16> %x, <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>
   %cmp2 = icmp ne <8 x i16> %x, <i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381>
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index badfd1af940ca..478d80e9827a5 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -179,19 +179,23 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
 ; CHECK-NOBMI-NEXT:    pxor %xmm4, %xmm1
 ; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm6
 ; CHECK-NOBMI-NEXT:    pcmpgtd %xmm4, %xmm6
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NOBMI-NEXT:    pand %xmm6, %xmm1
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
+; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm4
+; CHECK-NOBMI-NEXT:    pandn %xmm4, %xmm1
 ; CHECK-NOBMI-NEXT:    pxor %xmm5, %xmm3
 ; CHECK-NOBMI-NEXT:    pxor %xmm3, %xmm0
-; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm5
-; CHECK-NOBMI-NEXT:    pcmpgtd %xmm3, %xmm5
-; CHECK-NOBMI-NEXT:    movdqa %xmm5, %xmm7
-; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2]
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
+; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm4
+; CHECK-NOBMI-NEXT:    pcmpgtd %xmm3, %xmm4
 ; CHECK-NOBMI-NEXT:    pcmpeqd %xmm3, %xmm0
-; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; CHECK-NOBMI-NEXT:    andps %xmm7, %xmm0
-; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3]
-; CHECK-NOBMI-NEXT:    orps %xmm5, %xmm0
-; CHECK-NOBMI-NEXT:    xorps %xmm2, %xmm0
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NOBMI-NEXT:    pand %xmm4, %xmm0
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm3
+; CHECK-NOBMI-NEXT:    pandn %xmm3, %xmm0
+; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll
index c84a1159ad56a..0713f0bbe244c 100644
--- a/llvm/test/CodeGen/X86/machine-cp.ll
+++ b/llvm/test/CodeGen/X86/machine-cp.ll
@@ -100,55 +100,38 @@ define <16 x float> @foo(<16 x float> %x) {
 ; CHECK-LABEL: foo:
 ; CHECK:       ## %bb.0: ## %bb
 ; CHECK-NEXT:    xorps %xmm5, %xmm5
-; CHECK-NEXT:    cvttps2dq %xmm3, %xmm8
+; CHECK-NEXT:    cvttps2dq %xmm3, %xmm6
 ; CHECK-NEXT:    movaps %xmm3, %xmm4
 ; CHECK-NEXT:    cmpltps %xmm5, %xmm4
-; CHECK-NEXT:    movaps {{.*#+}} xmm7 = [13,14,15,16]
-; CHECK-NEXT:    movaps %xmm4, %xmm6
-; CHECK-NEXT:    orps %xmm7, %xmm6
-; CHECK-NEXT:    cvtdq2ps %xmm8, %xmm3
-; CHECK-NEXT:    andps %xmm7, %xmm3
-; CHECK-NEXT:    andps %xmm6, %xmm3
-; CHECK-NEXT:    andnps %xmm4, %xmm6
-; CHECK-NEXT:    cvttps2dq %xmm2, %xmm4
+; CHECK-NEXT:    cvttps2dq %xmm2, %xmm3
 ; CHECK-NEXT:    movaps %xmm2, %xmm7
 ; CHECK-NEXT:    cmpltps %xmm5, %xmm7
-; CHECK-NEXT:    movaps {{.*#+}} xmm8 = [9,10,11,12]
-; CHECK-NEXT:    movaps %xmm7, %xmm9
-; CHECK-NEXT:    orps %xmm8, %xmm9
-; CHECK-NEXT:    cvtdq2ps %xmm4, %xmm2
-; CHECK-NEXT:    andps %xmm8, %xmm2
-; CHECK-NEXT:    andps %xmm9, %xmm2
-; CHECK-NEXT:    andnps %xmm7, %xmm9
-; CHECK-NEXT:    cvttps2dq %xmm1, %xmm4
-; CHECK-NEXT:    cmpltps %xmm5, %xmm1
-; CHECK-NEXT:    movaps {{.*#+}} xmm7 = [5,6,7,8]
+; CHECK-NEXT:    cvttps2dq %xmm1, %xmm2
 ; CHECK-NEXT:    movaps %xmm1, %xmm8
-; CHECK-NEXT:    orps %xmm7, %xmm8
-; CHECK-NEXT:    cvtdq2ps %xmm4, %xmm4
-; CHECK-NEXT:    andps %xmm7, %xmm4
-; CHECK-NEXT:    andps %xmm8, %xmm4
-; CHECK-NEXT:    andnps %xmm1, %xmm8
+; CHECK-NEXT:    cmpltps %xmm5, %xmm8
 ; CHECK-NEXT:    cvttps2dq %xmm0, %xmm1
-; CHECK-NEXT:    cmpltps %xmm5, %xmm0
+; CHECK-NEXT:    movaps %xmm0, %xmm9
+; CHECK-NEXT:    cmpltps %xmm5, %xmm9
 ; CHECK-NEXT:    movaps {{.*#+}} xmm5 = [1,2,3,4]
-; CHECK-NEXT:    movaps %xmm0, %xmm7
-; CHECK-NEXT:    orps %xmm5, %xmm7
-; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
-; CHECK-NEXT:    andps %xmm5, %xmm1
-; CHECK-NEXT:    andps %xmm7, %xmm1
-; CHECK-NEXT:    andnps %xmm0, %xmm7
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [1,1,1,1]
-; CHECK-NEXT:    andps %xmm0, %xmm7
-; CHECK-NEXT:    orps %xmm7, %xmm1
-; CHECK-NEXT:    andps %xmm0, %xmm8
-; CHECK-NEXT:    orps %xmm8, %xmm4
-; CHECK-NEXT:    andps %xmm0, %xmm9
-; CHECK-NEXT:    orps %xmm9, %xmm2
-; CHECK-NEXT:    andps %xmm0, %xmm6
-; CHECK-NEXT:    orps %xmm6, %xmm3
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    movaps %xmm4, %xmm1
+; CHECK-NEXT:    orps %xmm5, %xmm9
+; CHECK-NEXT:    movaps {{.*#+}} xmm10 = [5,6,7,8]
+; CHECK-NEXT:    orps %xmm10, %xmm8
+; CHECK-NEXT:    movaps {{.*#+}} xmm11 = [9,10,11,12]
+; CHECK-NEXT:    orps %xmm11, %xmm7
+; CHECK-NEXT:    movaps {{.*#+}} xmm12 = [13,14,15,16]
+; CHECK-NEXT:    orps %xmm12, %xmm4
+; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm0
+; CHECK-NEXT:    cvtdq2ps %xmm2, %xmm1
+; CHECK-NEXT:    cvtdq2ps %xmm3, %xmm2
+; CHECK-NEXT:    cvtdq2ps %xmm6, %xmm3
+; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    andps %xmm9, %xmm0
+; CHECK-NEXT:    andps %xmm10, %xmm1
+; CHECK-NEXT:    andps %xmm8, %xmm1
+; CHECK-NEXT:    andps %xmm11, %xmm2
+; CHECK-NEXT:    andps %xmm7, %xmm2
+; CHECK-NEXT:    andps %xmm12, %xmm3
+; CHECK-NEXT:    andps %xmm4, %xmm3
 ; CHECK-NEXT:    retq
 bb:
   %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/mul-cmp.ll b/llvm/test/CodeGen/X86/mul-cmp.ll
index 0ee4601acf694..4fffb42bdc672 100644
--- a/llvm/test/CodeGen/X86/mul-cmp.ll
+++ b/llvm/test/CodeGen/X86/mul-cmp.ll
@@ -119,21 +119,21 @@ define <4 x i1> @mul_nsw_ne0_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE-LABEL: mul_nsw_ne0_v4i32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: mul_nsw_ne0_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %m = mul nsw <4 x i32> %x, %y
   %r = icmp ne <4 x i32> %m, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index 88934a382bbfa..aeb8fe93930a0 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -8,34 +8,36 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
 ; SSE2-LABEL: PR45808:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm3, %xmm5
-; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
 ; SSE2-NEXT:    pxor %xmm4, %xmm6
-; SSE2-NEXT:    movdqa %xmm6, %xmm7
-; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    movdqa %xmm1, %xmm7
+; SSE2-NEXT:    pxor %xmm4, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
 ; SSE2-NEXT:    movdqa %xmm2, %xmm8
 ; SSE2-NEXT:    pxor %xmm4, %xmm8
 ; SSE2-NEXT:    pxor %xmm0, %xmm4
 ; SSE2-NEXT:    movdqa %xmm4, %xmm9
 ; SSE2-NEXT:    pcmpgtd %xmm8, %xmm9
 ; SSE2-NEXT:    movdqa %xmm9, %xmm10
-; SSE2-NEXT:    shufps {{.*#+}} xmm10 = xmm10[0,2],xmm7[0,2]
-; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    shufps {{.*#+}} xmm10 = xmm10[0,2],xmm5[0,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
 ; SSE2-NEXT:    pcmpeqd %xmm8, %xmm4
-; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm7[1,3]
 ; SSE2-NEXT:    andps %xmm10, %xmm4
-; SSE2-NEXT:    shufps {{.*#+}} xmm9 = xmm9[1,3],xmm7[1,3]
-; SSE2-NEXT:    orps %xmm4, %xmm9
-; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
-; SSE2-NEXT:    pxor %xmm9, %xmm4
-; SSE2-NEXT:    pxor %xmm5, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm9 = xmm9[1,3],xmm5[1,3]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE2-NEXT:    pxor %xmm9, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[2,2,3,3]
+; SSE2-NEXT:    pandn %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
 ; SSE2-NEXT:    pand %xmm4, %xmm0
 ; SSE2-NEXT:    pandn %xmm2, %xmm4
 ; SSE2-NEXT:    por %xmm4, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm7, %xmm2
 ; SSE2-NEXT:    pslld $31, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
 ; SSE2-NEXT:    pand %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll
index b12be7cb129d3..37bc8ded142c1 100644
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -1004,9 +1004,10 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32>
 ; SSE2-NEXT:    pxor %xmm1, %xmm4
 ; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
 ; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm4
 ; SSE2-NEXT:    pxor %xmm2, %xmm4
-; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm4, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
 ; SSE2-NEXT:    paddd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -1147,9 +1148,10 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64>
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
 ; SSE2-NEXT:    por %xmm3, %xmm4
 ; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    por %xmm1, %xmm4
 ; SSE2-NEXT:    pxor %xmm2, %xmm4
-; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm4, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
 ; SSE2-NEXT:    paddq %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll
index f526db00df606..d97e603c636af 100644
--- a/llvm/test/CodeGen/X86/setcc-combine.ll
+++ b/llvm/test/CodeGen/X86/setcc-combine.ll
@@ -1020,9 +1020,9 @@ define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %na = xor <2 x i64> %a, <i64 -1, i64 -1>
   %nb = xor <2 x i64> %b, <i64 -1, i64 -1>
diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll
index c98aae7fbf405..4b1225c7ac1d8 100644
--- a/llvm/test/CodeGen/X86/setcc-logic.ll
+++ b/llvm/test/CodeGen/X86/setcc-logic.ll
@@ -541,9 +541,10 @@ define <4 x i32> @and_icmps_const_1bit_diff_vec(<4 x i32> %x) {
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [44,60,44,60]
 ; CHECK-NEXT:    pcmpeqd %xmm0, %xmm1
 ; CHECK-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm0, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %a = icmp ne <4 x i32> %x, <i32 44, i32 60, i32 44, i32 60>
   %b = icmp ne <4 x i32> %x, <i32 60, i32 44, i32 60, i32 44>
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
index 2d0778853fecd..aad6abfa78c23 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
@@ -2401,16 +2401,16 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
 ; CHECK-AVX1-NEXT:    vpor %xmm5, %xmm3, %xmm3
 ; CHECK-AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
 ; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
-; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm4, %xmm3
+; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm4, %xmm4
+; CHECK-AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
 ; CHECK-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
 ; CHECK-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm3
 ; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
 ; CHECK-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; CHECK-AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
-; CHECK-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
-; CHECK-AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; CHECK-AVX1-NEXT:    vandnps %ymm0, %ymm1, %ymm0
 ; CHECK-AVX1-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: pr51133:
@@ -2450,10 +2450,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
 ; CHECK-AVX2-NEXT:    vpor %ymm3, %ymm4, %ymm3
 ; CHECK-AVX2-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
 ; CHECK-AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; CHECK-AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; CHECK-AVX2-NEXT:    vpxor %ymm3, %ymm0, %ymm0
 ; CHECK-AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
-; CHECK-AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
-; CHECK-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
-; CHECK-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; CHECK-AVX2-NEXT:    vpandn %ymm0, %ymm1, %ymm0
 ; CHECK-AVX2-NEXT:    retq
 ;
 ; CHECK-AVX512VL-LABEL: pr51133:
diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
index 10dee14bdd1a0..82c157c207375 100644
--- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
@@ -37,9 +37,9 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; X64-NEXT:    pand %xmm2, %xmm0
 ; X64-NEXT:    pxor %xmm5, %xmm5
 ; X64-NEXT:    pcmpgtd %xmm4, %xmm5
-; X64-NEXT:    por %xmm2, %xmm5
-; X64-NEXT:    pcmpeqd %xmm2, %xmm2
-; X64-NEXT:    pxor %xmm5, %xmm2
+; X64-NEXT:    pcmpeqd %xmm4, %xmm4
+; X64-NEXT:    pxor %xmm5, %xmm4
+; X64-NEXT:    pandn %xmm4, %xmm2
 ; X64-NEXT:    por %xmm0, %xmm2
 ; X64-NEXT:    pandn %xmm2, %xmm1
 ; X64-NEXT:    por %xmm3, %xmm1
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
index 58fd6492f2ed5..00d122838dbc5 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
@@ -127,14 +127,21 @@ define <4 x i32> @in_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) {
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
 ; CHECK-SSE2:       # %bb.0:
-; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm0
-; CHECK-SSE2-NEXT:    orps (%rdx), %xmm0
+; CHECK-SSE2-NEXT:    movdqa (%rdi), %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm2
+; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pandn %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-SSE2-NEXT:    retq
 ;
 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
 ; CHECK-XOP:       # %bb.0:
-; CHECK-XOP-NEXT:    vmovaps (%rdi), %xmm0
-; CHECK-XOP-NEXT:    vorps (%rdx), %xmm0, %xmm0
+; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
+; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-XOP-NEXT:    vpxor (%rdx), %xmm1, %xmm2
+; CHECK-XOP-NEXT:    vpandn %xmm2, %xmm0, %xmm0
+; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; CHECK-XOP-NEXT:    retq
   %x = load <4 x i32>, ptr%px, align 16
   %y = load <4 x i32>, ptr%py, align 16
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index 84856aab85079..6e68b37bec98a 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -198,9 +198,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
 ; CHECK-SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-SSE2-NEXT:    pand %xmm2, %xmm1
 ; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
+; CHECK-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    pandn %xmm2, %xmm1
 ; CHECK-SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; CHECK-SSE2-NEXT:    retq
 ;
@@ -223,9 +223,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
 ; CHECK-SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-SSE41-NEXT:    pand %xmm2, %xmm1
 ; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
-; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-SSE41-NEXT:    pxor %xmm0, %xmm1
+; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-SSE41-NEXT:    pxor %xmm0, %xmm2
+; CHECK-SSE41-NEXT:    pandn %xmm2, %xmm1
 ; CHECK-SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; CHECK-SSE41-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
index ac4b25be5eb65..25ba593d47062 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
@@ -155,7 +155,7 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512-LABEL: ne_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -194,7 +194,7 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512-LABEL: ne_v4i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -233,7 +233,7 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512-LABEL: ne_v8i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -272,7 +272,7 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512-LABEL: ne_v16i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT:    pxor %xmm1, %xmm2
+; SSE41-NEXT:    pandn %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: ge_v2i64:
@@ -349,7 +349,7 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512-LABEL: ge_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -388,7 +388,7 @@ define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512-LABEL: ge_v4i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -427,7 +427,7 @@ define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512-LABEL: ge_v8i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -466,7 +466,7 @@ define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512-LABEL: ge_v16i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -606,9 +606,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: le_v2i64:
@@ -623,9 +623,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT:    pxor %xmm1, %xmm2
+; SSE41-NEXT:    pandn %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: le_v2i64:
@@ -657,7 +657,7 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512-LABEL: le_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -696,7 +696,7 @@ define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512-LABEL: le_v4i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -735,7 +735,7 @@ define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512-LABEL: le_v8i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -774,7 +774,7 @@ define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512-LABEL: le_v16i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
index 9a0756edbce32..bd730e7dbefbc 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -155,7 +155,7 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512-LABEL: ne_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -194,7 +194,7 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512-LABEL: ne_v4i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -233,7 +233,7 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512-LABEL: ne_v8i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -272,7 +272,7 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512-LABEL: ne_v16i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT:    pxor %xmm1, %xmm2
+; SSE41-NEXT:    pandn %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: ge_v2i64:
@@ -535,7 +535,7 @@ define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm1
 ; AVX512-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -594,7 +594,7 @@ define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -653,7 +653,7 @@ define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -696,7 +696,7 @@ define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -722,9 +722,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: le_v2i64:
@@ -739,9 +739,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT:    pxor %xmm1, %xmm2
+; SSE41-NEXT:    pandn %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: le_v2i64:
@@ -960,7 +960,7 @@ define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm1
 ; AVX512-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1020,7 +1020,7 @@ define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1080,7 +1080,7 @@ define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1123,7 +1123,7 @@ define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll
index c1045c7b72f2c..0fc298a2b4cd4 100644
--- a/llvm/test/CodeGen/X86/vec_compare.ll
+++ b/llvm/test/CodeGen/X86/vec_compare.ll
@@ -128,9 +128,9 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp sge <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
@@ -150,9 +150,9 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp sle <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
@@ -212,9 +212,9 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp uge <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
@@ -234,9 +234,9 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp ule <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll
index 370f88d644b57..048117dd43e66 100644
--- a/llvm/test/CodeGen/X86/vec_ctbits.ll
+++ b/llvm/test/CodeGen/X86/vec_ctbits.ll
@@ -52,9 +52,9 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind {
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlq $32, %xmm1
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlw $1, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -154,9 +154,9 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind {
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrld $16, %xmm1
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlw $1, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll
index 5a71878ea4579..ade6b5c8d6bdf 100644
--- a/llvm/test/CodeGen/X86/vec_setcc-2.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll
@@ -448,13 +448,14 @@ define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) {
 ; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    pand %xmm2, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ule_v2i64_splat:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
index cfb5fac2fd7aa..990113b1ecc1e 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -33,9 +33,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlq $32, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -73,9 +73,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlq $32, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    pandn %xmm2, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -311,9 +311,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlq $32, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -351,9 +351,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlq $32, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    pandn %xmm2, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -586,9 +586,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrld $16, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -628,9 +628,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrld $16, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    pandn %xmm2, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -835,9 +835,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrld $16, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -877,9 +877,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrld $16, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    pandn %xmm2, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1081,9 +1081,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $8, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1117,9 +1117,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $8, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    pandn %xmm2, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1289,9 +1289,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $8, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1325,9 +1325,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $8, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    pandn %xmm2, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1498,9 +1498,9 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE2-NEXT:    psrlw $4, %xmm1
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1531,9 +1531,9 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE3-NEXT:    psrlw $4, %xmm1
 ; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE3-NEXT:    pand %xmm2, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE3-NEXT:    pxor %xmm1, %xmm3
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1661,9 +1661,9 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
 ; SSE2-NEXT:    psrlw $4, %xmm1
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1694,9 +1694,9 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
 ; SSE3-NEXT:    psrlw $4, %xmm1
 ; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE3-NEXT:    pand %xmm2, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT:    pxor %xmm1, %xmm0
+; SSE3-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE3-NEXT:    pxor %xmm1, %xmm3
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index d35a365508d54..8c24aa50a626e 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -28,17 +28,17 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrlq $16, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
@@ -56,28 +56,30 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrlq $16, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpandn %ymm1, %ymm0, %ymm2
-; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm1, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
+; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm4
 ; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512DQ-NEXT:    # ymm5 = mem[0,1,0,1]
 ; AVX512DQ-NEXT:    vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm1, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlq $32, %ymm0, %ymm6
+; AVX512DQ-NEXT:    vpor %ymm6, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpandn %ymm3, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm2, %ymm0
 ; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
   ret <8 x i64> %out
@@ -107,17 +109,17 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrlq $16, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
@@ -135,28 +137,30 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrlq $16, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpandn %ymm1, %ymm0, %ymm2
-; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm1, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
+; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm4
 ; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512DQ-NEXT:    # ymm5 = mem[0,1,0,1]
 ; AVX512DQ-NEXT:    vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm1, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlq $32, %ymm0, %ymm6
+; AVX512DQ-NEXT:    vpor %ymm6, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpandn %ymm3, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm2, %ymm0
 ; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
   ret <8 x i64> %out
@@ -184,17 +188,17 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrld $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
@@ -214,34 +218,35 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrld $8, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpandn %ymm0, %ymm1, %ymm2
-; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm1 = ~zmm1
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm0, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512DQ-NEXT:    # ymm5 = mem[0,1,0,1]
-; AVX512DQ-NEXT:    vpshufb %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512DQ-NEXT:    # ymm6 = mem[0,1,0,1]
+; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm0, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm6 = ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[6],ymm4[6],ymm3[7],ymm4[7]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm6, %ymm6
-; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[4],ymm4[4],ymm3[5],ymm4[5]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpackuswb %ymm6, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[6],ymm4[6],ymm0[7],ymm4[7]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm6, %ymm3
+; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpackuswb %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpandn %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
@@ -271,17 +276,17 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrld $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
@@ -301,34 +306,35 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrld $8, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpandn %ymm0, %ymm1, %ymm2
-; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm1 = ~zmm1
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm0, %ymm3, %ymm4
-; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512DQ-NEXT:    # ymm5 = mem[0,1,0,1]
-; AVX512DQ-NEXT:    vpshufb %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512DQ-NEXT:    # ymm6 = mem[0,1,0,1]
+; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm0, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm6 = ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[6],ymm4[6],ymm3[7],ymm4[7]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm6, %ymm6
-; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[4],ymm4[4],ymm3[5],ymm4[5]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpackuswb %ymm6, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[6],ymm4[6],ymm0[7],ymm4[7]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm6, %ymm3
+; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpackuswb %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpandn %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
index a1b277efde6ff..1473da6aac5ea 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
@@ -20,9 +20,9 @@ define <2 x i32> @illegal_ctlz(<2 x i32> %v1) {
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrld $16, %xmm1
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlw $1, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
index c1d30b6d5a995..d8e955c93581e 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
@@ -826,11 +826,11 @@ define <2 x i64> @ne_1_v2i64(<2 x i64> %0) {
 ; SSE-NEXT:    pcmpgtd %xmm2, %xmm3
 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE-NEXT:    pand %xmm4, %xmm2
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
-; SSE-NEXT:    por %xmm2, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: ne_1_v2i64:
diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
index 97124f0a9d8d9..55f2258aad018 100644
--- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -117,9 +117,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: uge_v2i64:
@@ -136,9 +136,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT:    pxor %xmm1, %xmm2
+; SSE41-NEXT:    pandn %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: uge_v2i64:
@@ -170,9 +170,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ule_v2i64:
@@ -189,9 +189,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT:    pxor %xmm1, %xmm2
+; SSE41-NEXT:    pandn %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: ule_v2i64:
diff --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 833db0efbda89..90bbde645cd08 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -7,9 +7,9 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 ; CHECK-NEXT:    pcmpeqq %xmm2, %xmm0
 ; CHECK-NEXT:    pcmpeqq %xmm2, %xmm1
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm1, %xmm2
+; CHECK-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
   %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
@@ -32,19 +32,19 @@ define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
 ; CHECK-NEXT:    movq %rcx, %xmm0
 ; CHECK-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
 ; CHECK-NEXT:    pxor %xmm4, %xmm4
-; CHECK-NEXT:    pcmpeqq %xmm4, %xmm2
 ; CHECK-NEXT:    pcmpeqq %xmm4, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm5, %xmm5
-; CHECK-NEXT:    pcmpeqq %xmm4, %xmm1
-; CHECK-NEXT:    por %xmm2, %xmm1
+; CHECK-NEXT:    pcmpeqq %xmm4, %xmm2
+; CHECK-NEXT:    packssdw %xmm0, %xmm2
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
 ; CHECK-NEXT:    pcmpeqq %xmm4, %xmm3
-; CHECK-NEXT:    por %xmm0, %xmm3
+; CHECK-NEXT:    pcmpeqq %xmm4, %xmm1
 ; CHECK-NEXT:    packssdw %xmm3, %xmm1
-; CHECK-NEXT:    pxor %xmm5, %xmm1
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,2,2]
+; CHECK-NEXT:    pxor %xmm0, %xmm1
+; CHECK-NEXT:    pandn %xmm1, %xmm2
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,2,2,2]
 ; CHECK-NEXT:    pslld $31, %xmm0
 ; CHECK-NEXT:    psrad $31, %xmm0
-; CHECK-NEXT:    pmovsxdq %xmm1, %xmm1
+; CHECK-NEXT:    pmovsxdq %xmm2, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, (%rdi)
 ; CHECK-NEXT:    movq %xmm0, 16(%rdi)
 ; CHECK-NEXT:    retq
>From e2032efe50c3c421c98575d822442dfde65dab71 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:56:43 +0000
Subject: [PATCH 15/20] [PowerPC]: Updated tests
---
 llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll |   1 +
 .../CodeGen/PowerPC/fp-strict-fcmp-spe.ll     |  24 ++--
 .../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll    |  19 ++-
 llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll  |  65 +++++++++
 .../CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll  |  19 ++-
 .../CodeGen/PowerPC/xxeval-vselect-x-and.ll   |  19 ++-
 .../CodeGen/PowerPC/xxeval-vselect-x-b.ll     |  15 ++-
 .../CodeGen/PowerPC/xxeval-vselect-x-c.ll     |  15 ++-
 .../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll   |  19 ++-
 .../CodeGen/PowerPC/xxeval-vselect-x-nor.ll   | 125 ++++++++++++++----
 .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll   |  19 ++-
 11 files changed, 268 insertions(+), 72 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index bea24ee98336d..ed8dc504f026a 100644
--- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | \
 ; RUN:   grep eqv | count 3
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | \
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
index c20d319f2ac79..78644691fb646 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
@@ -113,14 +113,12 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
 define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
 ; SPE-LABEL: test_f32_ueq_s:
 ; SPE:       # %bb.0:
-; SPE-NEXT:    efscmplt cr0, r5, r6
-; SPE-NEXT:    bc 12, gt, .LBB7_3
-; SPE-NEXT:  # %bb.1:
 ; SPE-NEXT:    efscmpgt cr0, r5, r6
-; SPE-NEXT:    bc 12, gt, .LBB7_3
-; SPE-NEXT:  # %bb.2:
-; SPE-NEXT:    mr r4, r3
-; SPE-NEXT:  .LBB7_3:
+; SPE-NEXT:    bc 12, gt, .LBB7_2
+; SPE-NEXT:  # %bb.1:
+; SPE-NEXT:    efscmplt cr0, r5, r6
+; SPE-NEXT:    bclr 4, gt, 0
+; SPE-NEXT:  .LBB7_2:
 ; SPE-NEXT:    mr r3, r4
 ; SPE-NEXT:    blr
   %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
@@ -355,14 +353,12 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    evmergelo r7, r7, r8
 ; SPE-NEXT:    evmergelo r5, r5, r6
-; SPE-NEXT:    efdcmplt cr0, r5, r7
-; SPE-NEXT:    bc 12, gt, .LBB21_3
-; SPE-NEXT:  # %bb.1:
 ; SPE-NEXT:    efdcmpgt cr0, r5, r7
-; SPE-NEXT:    bc 12, gt, .LBB21_3
-; SPE-NEXT:  # %bb.2:
-; SPE-NEXT:    mr r4, r3
-; SPE-NEXT:  .LBB21_3:
+; SPE-NEXT:    bc 12, gt, .LBB21_2
+; SPE-NEXT:  # %bb.1:
+; SPE-NEXT:    efdcmplt cr0, r5, r7
+; SPE-NEXT:    bclr 4, gt, 0
+; SPE-NEXT:  .LBB21_2:
 ; SPE-NEXT:    mr r3, r4
 ; SPE-NEXT:    blr
   %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
index c23daac80279b..872a08c20eae8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -1,29 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; Check the miscellaneous logical vector operations added in P8
-; 
+;
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 ; Test x eqv y
 define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: test_veqv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    veqv 2, 2, 3
+; CHECK-NEXT:    blr
        %tmp = xor <4 x i32> %x, %y
        %ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1>
        ret <4 x i32> %ret_val
-; CHECK: veqv 2, 2, 3
 }
 
 ; Test x vnand y
 define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: test_vnand:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vnand 2, 2, 3
+; CHECK-NEXT:    blr
        %tmp = and <4 x i32> %x, %y
        %ret_val = xor <4 x i32> %tmp, <i32 -1, i32 -1, i32 -1, i32 -1>
        ret <4 x i32> %ret_val
-; CHECK: vnand 2, 2, 3
 }
 
 ; Test x vorc y and variants
 define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: test_vorc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vor 2, 3, 2
+; CHECK-NEXT:    blr
        %tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
        %tmp2 = or <4 x i32> %x, %tmp1
-; CHECK: vorc 3, 2, 3      
        %tmp3 = xor <4 x i32> %tmp2, <i32 -1, i32 -1, i32 -1, i32 -1>
        %tmp4 = or <4 x i32> %tmp3, %x
-; CHECK: vorc 2, 2, 3
        ret <4 x i32> %tmp4
 }
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
index ba74df956e71e..7f7a52fe7de65 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
 
@@ -6,6 +7,10 @@
 ; CHECK:         xxlandc v2, v2, v3
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_not(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_not:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxlandc v2, v2, v3
+; CHECK-NEXT:    blr
 entry:
   %neg = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
   %and = and <4 x i32> %neg, %A
@@ -17,6 +22,10 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <16 x i8> @and_and8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %A
   %and1 = and <16 x i8> %and, %C
@@ -28,6 +37,10 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <8 x i16> @and_and16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %A
   %and1 = and <8 x i16> %and, %C
@@ -39,6 +52,10 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_and32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %A
   %and1 = and <4 x i32> %and, %C
@@ -50,6 +67,10 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <2 x i64> @and_and64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %A
   %and1 = and <2 x i64> %and, %C
@@ -61,6 +82,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 14
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_nand:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 14
+; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %C, %B
   %neg = xor <4 x i32> %and, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -73,6 +98,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 7
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_or:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 7
+; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %C, %B
   %and = and <4 x i32> %or, %A
@@ -84,6 +113,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 8
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_nor:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 8
+; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %C, %B
   %neg = xor <4 x i32> %or, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -96,6 +129,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 6
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_xor:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 6
+; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %C, %B
   %and = and <4 x i32> %xor, %A
@@ -107,6 +144,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v3, v4, 9
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_eqv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 9
+; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
   %neg = xor <4 x i32> %xor, %C
@@ -119,6 +160,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 241
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_nand:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 241
+; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %C, %B
   %A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -131,6 +176,10 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 254
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_and(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_and:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v3, v2, v4, 254
+; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %A
   %and1 = and <4 x i32> %and, %C
@@ -143,6 +192,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 249
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_xor:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 249
+; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %C, %B
   %and = and <4 x i32> %xor, %A
@@ -155,6 +208,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 246
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_eqv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 246
+; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %C, %B
   %A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -167,6 +224,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 248
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_or:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v4, v3, 248
+; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %C, %B
   %and = and <4 x i32> %or, %A
@@ -179,6 +240,10 @@ entry:
 ; CHECK:         xxeval v2, v2, v3, v4, 247
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_nor:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 247
+; CHECK-NEXT:    blr
 entry:
   %A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
   %or = or <4 x i32> %A.not, %B
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
index 6616a1e6e7e9f..ba5c9edb3897d 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
@@ -32,7 +32,10 @@ entry:
 define dso_local <8 x i16> @eqvA_B_C(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: eqvA_B_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 150
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxland vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %C
@@ -48,7 +51,8 @@ entry:
 define dso_local <16 x i8> @norA_andB_C(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: norA_andB_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 224
+; CHECK-NEXT:    xxlnor vs0, v2, v2
+; CHECK-NEXT:    xxeval v2, vs0, v3, v4, 14
 ; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %C
@@ -100,7 +104,8 @@ entry:
 define dso_local <4 x i32> @norA_xorB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: norA_xorB_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 144
+; CHECK-NEXT:    xxlnor vs0, v2, v2
+; CHECK-NEXT:    xxeval v2, vs0, v3, v4, 9
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -113,7 +118,9 @@ entry:
 define dso_local <4 x i32> @norA_B_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: norA_B_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 128
+; CHECK-NEXT:    xxlnor vs0, v4, v4
+; CHECK-NEXT:    xxlnor vs1, v3, v3
+; CHECK-NEXT:    xxeval v2, v2, vs1, vs0, 16
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -164,7 +171,9 @@ entry:
 define dso_local <4 x i32> @orA_norB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: orA_norB_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 143
+; CHECK-NEXT:    xxlnor vs0, v4, v4
+; CHECK-NEXT:    xxlnor vs1, v3, v3
+; CHECK-NEXT:    xxeval v2, v2, vs1, vs0, 31
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index b41220b01373a..f98edc21bf2ea 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -80,9 +80,11 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -97,10 +99,13 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -115,9 +120,12 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxland vs1, v3, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -132,9 +140,12 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxland vs1, v3, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index a51e392279d55..0baa420b79761 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -77,8 +77,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -92,10 +93,12 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -109,9 +112,11 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_nor_BC_B_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -125,9 +130,11 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 54bf6c03f8c1a..6fc822d729457 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -77,8 +77,9 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -92,10 +93,12 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -109,9 +112,11 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_nor_BC_C_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -125,9 +130,11 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
index ba7680b27cc17..78ae36cc0ecf7 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
@@ -84,9 +84,11 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxleqv vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -102,10 +104,13 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxleqv vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -121,9 +126,12 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxleqv vs1, v3, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -139,9 +147,12 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxleqv vs1, v3, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
index 369587454a7c1..90928e668afd8 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
@@ -15,9 +15,11 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -32,10 +34,13 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -50,9 +55,12 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxland vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %C
@@ -67,9 +75,12 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxland vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %C
@@ -85,8 +96,9 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT:    xxsel v2, vs0, v3, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -100,10 +112,12 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_B_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT:    xxsel v2, vs0, v3, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -117,9 +131,11 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_B_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT:    xxsel v2, vs0, v3, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -133,9 +149,11 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_B_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT:    xxsel v2, vs0, v3, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
@@ -150,8 +168,9 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT:    xxsel v2, vs0, v4, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -165,10 +184,12 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_C_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT:    xxsel v2, vs0, v4, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -182,9 +203,11 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_C_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT:    xxsel v2, vs0, v4, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -198,9 +221,11 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_C_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT:    xxsel v2, vs0, v4, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
@@ -214,9 +239,11 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -231,10 +258,13 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -249,9 +279,12 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlxor vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <16 x i8> %B, %C
@@ -266,9 +299,12 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlxor vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <8 x i16> %B, %C
@@ -283,9 +319,11 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -300,10 +338,13 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -318,9 +359,12 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxlnor vs1, v4, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT:    xxsel v2, vs0, vs1, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>  ; Vector not operation
@@ -335,9 +379,12 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnor vs0, v4, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>  ; Vector not operation
@@ -352,9 +399,11 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v3, v3
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -369,10 +418,13 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnor vs0, v3, v3
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
@@ -387,9 +439,12 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxlnor vs1, v3, v3
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT:    xxsel v2, vs0, vs1, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>  ; Vector not operation
@@ -404,9 +459,12 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnor vs0, v3, v3
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>  ; Vector not operation
@@ -421,9 +479,11 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -439,10 +499,13 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -458,9 +521,12 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnand vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %C
@@ -476,9 +542,12 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs1, vs1, vs1
+; CHECK-NEXT:    xxlnand vs0, v3, v4
+; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index 0fc296cc5a4e2..5031ebc930e11 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -267,9 +267,11 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
+; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
+; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -284,10 +286,13 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -302,9 +307,12 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxlxor vs1, v3, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -319,9 +327,12 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
+; CHECK-NEXT:    xxleqv vs0, vs0, vs0
+; CHECK-NEXT:    xxlxor vs1, v3, v4
+; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
>From 52185882fc9f24090c54db0649e4121320592de1 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 07:33:47 +0000
Subject: [PATCH 16/20] [X86]: Removed reversing of rewriting demorgan
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0b64ff370b10..e870514db2443 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55615,10 +55615,12 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
 
   // Folds for better commutativity:
   if (N1->hasOneUse()) {
+    /*
     // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
     if (SDValue Not = IsNOT(N1, DAG))
       return DAG.getNOT(
           DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
+          */
 
     // ANDNP(x,PSHUFB(y,z)) -> PSHUFB(y,OR(z,x))
     // Zero out elements by setting the PSHUFB mask value to 0xFF.
>From de10f4a6eeae333b8e0972ff499f09015b72c203 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 07:34:18 +0000
Subject: [PATCH 17/20] [X86][PowerPC][AArch64]: Updated tests
---
 llvm/test/CodeGen/AArch64/bsl.ll              | 120 ++++----
 .../CodeGen/AArch64/build-vector-dup-simd.ll  |  24 +-
 .../CodeGen/AArch64/fp16-v4-instructions.ll   |  44 +--
 .../CodeGen/AArch64/fp16-v8-instructions.ll   |  50 +---
 llvm/test/CodeGen/AArch64/sve2-bsl.ll         |  36 +--
 .../CodeGen/PowerPC/fp-strict-fcmp-spe.ll     |  24 +-
 .../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll    |   3 +-
 .../CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll  |  10 +-
 .../CodeGen/PowerPC/xxeval-vselect-x-and.ll   |  19 +-
 .../CodeGen/PowerPC/xxeval-vselect-x-b.ll     |  15 +-
 .../CodeGen/PowerPC/xxeval-vselect-x-c.ll     |  15 +-
 .../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll   |  19 +-
 .../CodeGen/PowerPC/xxeval-vselect-x-nor.ll   | 125 ++------
 .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll   |  19 +-
 llvm/test/CodeGen/X86/abds-vector-128.ll      |   6 +-
 .../test/CodeGen/X86/avx512-mask-bit-manip.ll |  25 +-
 llvm/test/CodeGen/X86/combine-or.ll           |  39 +--
 llvm/test/CodeGen/X86/combine-srl.ll          |   9 +-
 .../CodeGen/X86/expand-vp-int-intrinsics.ll   |   9 +-
 llvm/test/CodeGen/X86/ispow2.ll               |  24 +-
 llvm/test/CodeGen/X86/machine-cp.ll           |  67 +++--
 llvm/test/CodeGen/X86/promote-cmp.ll          |  34 +--
 llvm/test/CodeGen/X86/setcc-combine.ll        |   6 +-
 .../X86/urem-seteq-vec-tautological.ll        |  12 +-
 llvm/test/CodeGen/X86/vec_cmp_sint-128.ll     |  24 +-
 llvm/test/CodeGen/X86/vec_cmp_uint-128.ll     |  24 +-
 llvm/test/CodeGen/X86/vec_compare.ll          |  24 +-
 llvm/test/CodeGen/X86/vec_ctbits.ll           |  18 +-
 llvm/test/CodeGen/X86/vec_setcc-2.ll          |  13 +-
 llvm/test/CodeGen/X86/vector-lzcnt-128.ll     | 248 ++++++++--------
 llvm/test/CodeGen/X86/vector-lzcnt-512.ll     | 276 +++++++++---------
 llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll  |   9 +-
 llvm/test/CodeGen/X86/vector-popcnt-128.ll    |  10 +-
 llvm/test/CodeGen/X86/vector-unsigned-cmp.ll  |  24 +-
 34 files changed, 627 insertions(+), 797 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll
index fcf7393d2c801..df6b6f75b8935 100644
--- a/llvm/test/CodeGen/AArch64/bsl.ll
+++ b/llvm/test/CodeGen/AArch64/bsl.ll
@@ -32,19 +32,17 @@ define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 ; NEON-LABEL: nbsl_v1i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
-; NEON-NEXT:    bic v1.8b, v1.8b, v2.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; NEON-NEXT:    mvn v0.8b, v0.8b
-; NEON-NEXT:    bic v0.8b, v0.8b, v1.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v1i64:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT:    bic v1.8b, v1.8b, v2.8b
-; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT:    bic v0.8b, v0.8b, v1.8b
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <1 x i64> %2, %0
   %5 = xor <1 x i64> %2, splat (i64 -1)
@@ -80,8 +78,9 @@ define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
 ; NEON-LABEL: bsl2n_v1i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    mvn v1.8b, v1.8b
-; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
+; NEON-NEXT:    orr v1.8b, v2.8b, v1.8b
+; NEON-NEXT:    orn v0.8b, v0.8b, v1.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: bsl2n_v1i64:
@@ -119,19 +118,17 @@ define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NEON-LABEL: nbsl_v2i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
-; NEON-NEXT:    bic v1.16b, v1.16b, v2.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; NEON-NEXT:    mvn v0.16b, v0.16b
-; NEON-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v2i64:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT:    bic v1.16b, v1.16b, v2.16b
-; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT:    bic v0.16b, v0.16b, v1.16b
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <2 x i64> %2, %0
   %5 = xor <2 x i64> %2, splat (i64 -1)
@@ -167,8 +164,9 @@ define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; NEON-LABEL: bsl2n_v2i64:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    mvn v1.16b, v1.16b
-; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
+; NEON-NEXT:    orr v1.16b, v2.16b, v1.16b
+; NEON-NEXT:    orn v0.16b, v0.16b, v1.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: bsl2n_v2i64:
@@ -191,18 +189,17 @@ define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
 ; NEON-LABEL: nbsl_v8i8:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v3.8b, v2.8b, v1.8b
-; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
-; NEON-NEXT:    orn v1.8b, v3.8b, v1.8b
-; NEON-NEXT:    bic v0.8b, v1.8b, v0.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    and v3.8b, v2.8b, v1.8b
-; SVE2-NEXT:    and v0.8b, v2.8b, v0.8b
-; SVE2-NEXT:    orn v1.8b, v3.8b, v1.8b
-; SVE2-NEXT:    bic v0.8b, v1.8b, v0.8b
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <8 x i8> %2, %0
   %5 = xor <8 x i8> %2, splat (i8 -1)
@@ -215,18 +212,17 @@ define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
 define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
 ; NEON-LABEL: nbsl_v4i16:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v3.8b, v2.8b, v1.8b
-; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
-; NEON-NEXT:    orn v1.8b, v3.8b, v1.8b
-; NEON-NEXT:    bic v0.8b, v1.8b, v0.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT:    mvn v0.8b, v0.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    and v3.8b, v2.8b, v1.8b
-; SVE2-NEXT:    and v0.8b, v2.8b, v0.8b
-; SVE2-NEXT:    orn v1.8b, v3.8b, v1.8b
-; SVE2-NEXT:    bic v0.8b, v1.8b, v0.8b
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <4 x i16> %2, %0
   %5 = xor <4 x i16> %2, splat (i16 -1)
@@ -239,19 +235,17 @@ define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
 define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
 ; NEON-LABEL: nbsl_v2i32:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v0.8b, v2.8b, v0.8b
-; NEON-NEXT:    bic v1.8b, v1.8b, v2.8b
+; NEON-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; NEON-NEXT:    mvn v0.8b, v0.8b
-; NEON-NEXT:    bic v0.8b, v0.8b, v1.8b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v2i32:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT:    bic v1.8b, v1.8b, v2.8b
-; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT:    bic v0.8b, v0.8b, v1.8b
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <2 x i32> %2, %0
   %5 = xor <2 x i32> %2, splat (i32 -1)
@@ -264,18 +258,17 @@ define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
 define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; NEON-LABEL: nbsl_v16i8:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v3.16b, v2.16b, v1.16b
-; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
-; NEON-NEXT:    orn v1.16b, v3.16b, v1.16b
-; NEON-NEXT:    bic v0.16b, v1.16b, v0.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    and v3.16b, v2.16b, v1.16b
-; SVE2-NEXT:    and v0.16b, v2.16b, v0.16b
-; SVE2-NEXT:    orn v1.16b, v3.16b, v1.16b
-; SVE2-NEXT:    bic v0.16b, v1.16b, v0.16b
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <16 x i8> %2, %0
   %5 = xor <16 x i8> %2, splat (i8 -1)
@@ -288,18 +281,17 @@ define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 ; NEON-LABEL: nbsl_v8i16:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v3.16b, v2.16b, v1.16b
-; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
-; NEON-NEXT:    orn v1.16b, v3.16b, v1.16b
-; NEON-NEXT:    bic v0.16b, v1.16b, v0.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    and v3.16b, v2.16b, v1.16b
-; SVE2-NEXT:    and v0.16b, v2.16b, v0.16b
-; SVE2-NEXT:    orn v1.16b, v3.16b, v1.16b
-; SVE2-NEXT:    bic v0.16b, v1.16b, v0.16b
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <8 x i16> %2, %0
   %5 = xor <8 x i16> %2, splat (i16 -1)
@@ -312,19 +304,17 @@ define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
 define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
 ; NEON-LABEL: nbsl_v4i32:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    and v0.16b, v2.16b, v0.16b
-; NEON-NEXT:    bic v1.16b, v1.16b, v2.16b
+; NEON-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; NEON-NEXT:    mvn v0.16b, v0.16b
-; NEON-NEXT:    bic v0.16b, v0.16b, v1.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nbsl_v4i32:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT:    bic v1.16b, v1.16b, v2.16b
-; SVE2-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT:    bic v0.16b, v0.16b, v1.16b
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
   %4 = and <4 x i32> %2, %0
   %5 = xor <4 x i32> %2, splat (i32 -1)
@@ -481,14 +471,16 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 {
 define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 {
 ; NEON-LABEL: nor_q:
 ; NEON:       // %bb.0:
-; NEON-NEXT:    mvn v1.16b, v1.16b
-; NEON-NEXT:    bic v0.16b, v1.16b, v0.16b
+; NEON-NEXT:    orr v0.16b, v1.16b, v0.16b
+; NEON-NEXT:    mvn v0.16b, v0.16b
 ; NEON-NEXT:    ret
 ;
 ; SVE2-LABEL: nor_q:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    mvn v1.16b, v1.16b
-; SVE2-NEXT:    bic v0.16b, v1.16b, v0.16b
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    nbsl z0.d, z0.d, z1.d, z0.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
   %3 = or <2 x i64> %1, %0
   %4 = xor <2 x i64> %3, splat (i64 -1)
diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
index af7f9b6d471ad..ac0b8e89519dd 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
@@ -117,10 +117,10 @@ entry:
 define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
 ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq:
 ; CHECK-NOFULLFP16:       // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT:    fcmgt s2, s1, s0
-; CHECK-NOFULLFP16-NEXT:    fcmgt s0, s0, s1
-; CHECK-NOFULLFP16-NEXT:    mvn v1.8b, v2.8b
-; CHECK-NOFULLFP16-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-NOFULLFP16-NEXT:    fcmgt s2, s0, s1
+; CHECK-NOFULLFP16-NEXT:    fcmgt s0, s1, s0
+; CHECK-NOFULLFP16-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NOFULLFP16-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NOFULLFP16-NEXT:    ret
 ;
 ; CHECK-NONANS-LABEL: dup_v1i32_ueq:
@@ -130,10 +130,10 @@ define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
 ;
 ; CHECK-FULLFP16-LABEL: dup_v1i32_ueq:
 ; CHECK-FULLFP16:       // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT:    fcmgt s2, s1, s0
-; CHECK-FULLFP16-NEXT:    fcmgt s0, s0, s1
-; CHECK-FULLFP16-NEXT:    mvn v1.8b, v2.8b
-; CHECK-FULLFP16-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-FULLFP16-NEXT:    fcmgt s2, s0, s1
+; CHECK-FULLFP16-NEXT:    fcmgt s0, s1, s0
+; CHECK-FULLFP16-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-FULLFP16-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-FULLFP16-NEXT:    ret
 entry:
   %0 = fcmp ueq float %a, %b
@@ -260,10 +260,10 @@ entry:
 define <1 x float> @dup_v1i32_uno(float %a, float %b) {
 ; CHECK-LABEL: dup_v1i32_uno:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcmgt s2, s1, s0
-; CHECK-NEXT:    fcmge s0, s0, s1
-; CHECK-NEXT:    mvn v1.8b, v2.8b
-; CHECK-NEXT:    bic v0.8b, v1.8b, v0.8b
+; CHECK-NEXT:    fcmge s2, s0, s1
+; CHECK-NEXT:    fcmgt s0, s1, s0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NEXT:    ret
 entry:
   %0 = fcmp uno float %a, %b
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 529b76cf84906..6233ce743b706 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -563,13 +563,13 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    fcmgt v2.4h, v1.4h, v0.4h
-; CHECK-FP16-SD-NEXT:    fcmgt v0.4h, v0.4h, v1.4h
-; CHECK-FP16-SD-NEXT:    mvn v1.8b, v2.8b
-; CHECK-FP16-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
-; CHECK-FP16-SD-NEXT:    ret
+; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-FP16:       // %bb.0:
+; CHECK-FP16-NEXT:    fcmgt v2.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-NEXT:    mvn v0.8b, v0.8b
+; CHECK-FP16-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -581,14 +581,6 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-CVT-GI-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-CVT-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    fcmgt v2.4h, v0.4h, v1.4h
-; CHECK-FP16-GI-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-GI-NEXT:    mvn v0.8b, v0.8b
-; CHECK-FP16-GI-NEXT:    ret
 
   %1 = fcmp ueq <4 x half> %a, %b
   ret <4 x i1> %1
@@ -722,13 +714,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: test_fcmp_uno:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    fcmgt v2.4h, v1.4h, v0.4h
-; CHECK-FP16-SD-NEXT:    fcmge v0.4h, v0.4h, v1.4h
-; CHECK-FP16-SD-NEXT:    mvn v1.8b, v2.8b
-; CHECK-FP16-SD-NEXT:    bic v0.8b, v1.8b, v0.8b
-; CHECK-FP16-SD-NEXT:    ret
+; CHECK-FP16-LABEL: test_fcmp_uno:
+; CHECK-FP16:       // %bb.0:
+; CHECK-FP16-NEXT:    fcmge v2.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-NEXT:    mvn v0.8b, v0.8b
+; CHECK-FP16-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_uno:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -740,14 +732,6 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-CVT-GI-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-CVT-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_uno:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    fcmge v2.4h, v0.4h, v1.4h
-; CHECK-FP16-GI-NEXT:    fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-GI-NEXT:    orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-GI-NEXT:    mvn v0.8b, v0.8b
-; CHECK-FP16-GI-NEXT:    ret
 
   %1 = fcmp uno <4 x half> %a, %b
   ret <4 x i1> %1
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 6d67fc9ebe1c6..86763eb5f9e3b 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -990,14 +990,14 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    fcmgt v2.8h, v1.8h, v0.8h
-; CHECK-FP16-SD-NEXT:    fcmgt v0.8h, v0.8h, v1.8h
-; CHECK-FP16-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-FP16-SD-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-SD-NEXT:    ret
+; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-FP16:       // %bb.0:
+; CHECK-FP16-NEXT:    fcmgt v2.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-NEXT:    mvn v0.16b, v0.16b
+; CHECK-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-FP16-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -1016,15 +1016,6 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-CVT-GI-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    fcmgt v2.8h, v0.8h, v1.8h
-; CHECK-FP16-GI-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-FP16-GI-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-GI-NEXT:    ret
   %1 = fcmp ueq <8 x half> %a, %b
   ret <8 x i1> %1
 }
@@ -1199,14 +1190,14 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-SD-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: test_fcmp_uno:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    fcmgt v2.8h, v1.8h, v0.8h
-; CHECK-FP16-SD-NEXT:    fcmge v0.8h, v0.8h, v1.8h
-; CHECK-FP16-SD-NEXT:    mvn v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
-; CHECK-FP16-SD-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-SD-NEXT:    ret
+; CHECK-FP16-LABEL: test_fcmp_uno:
+; CHECK-FP16:       // %bb.0:
+; CHECK-FP16-NEXT:    fcmge v2.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-NEXT:    mvn v0.16b, v0.16b
+; CHECK-FP16-NEXT:    xtn v0.8b, v0.8h
+; CHECK-FP16-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_fcmp_uno:
 ; CHECK-CVT-GI:       // %bb.0:
@@ -1225,15 +1216,6 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-CVT-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-CVT-GI-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-CVT-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_uno:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    fcmge v2.8h, v0.8h, v1.8h
-; CHECK-FP16-GI-NEXT:    fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-GI-NEXT:    mvn v0.16b, v0.16b
-; CHECK-FP16-GI-NEXT:    xtn v0.8b, v0.8h
-; CHECK-FP16-GI-NEXT:    ret
   %1 = fcmp uno <8 x half> %a, %b
   ret <8 x i1> %1
 }
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 80293388a5cf9..6cfe66eb8e633 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -46,9 +46,7 @@ define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-LABEL: nbsl_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, #127 // =0x7f
-; CHECK-NEXT:    and z1.b, z1.b, #0x80
-; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT:    bic z0.d, z2.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 16 x i8> %a, splat(i8 127)
   %2 = and <vscale x 16 x i8> %b, splat(i8 -128)
@@ -61,9 +59,7 @@ define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
 ; CHECK-LABEL: nbsl_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
-; CHECK-NEXT:    and z1.h, z1.h, #0x8000
-; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT:    bic z0.d, z2.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 8 x i16> %a, splat(i16 32767)
   %2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
@@ -76,9 +72,7 @@ define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
 ; CHECK-LABEL: nbsl_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.s, #0x7fffffff
-; CHECK-NEXT:    and z1.s, z1.s, #0x80000000
-; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT:    bic z0.d, z2.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
   %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
@@ -91,9 +85,7 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
 ; CHECK-LABEL: nbsl_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
-; CHECK-NEXT:    and z1.d, z1.d, #0x8000000000000000
-; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT:    bic z0.d, z2.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
   %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
@@ -123,9 +115,7 @@ define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x
 define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
 ; CHECK-LABEL: codegen_nbsl_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 16 x i8> %2, %0
   %5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
@@ -175,9 +165,7 @@ define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x
 define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
 ; CHECK-LABEL: codegen_nbsl_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 8 x i16> %2, %0
   %5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
@@ -227,9 +215,7 @@ define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x
 define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
 ; CHECK-LABEL: codegen_nbsl_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 4 x i32> %2, %0
   %5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
@@ -279,9 +265,7 @@ define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x
 define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
 ; CHECK-LABEL: codegen_nbsl_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT:    bic z0.d, z0.d, z1.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %4 = and <vscale x 2 x i64> %2, %0
   %5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
@@ -357,9 +341,7 @@ define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
 define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
 ; CHECK-LABEL: nor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    eor z1.d, z1.d, z2.d
-; CHECK-NEXT:    bic z0.d, z1.d, z0.d
+; CHECK-NEXT:    nbsl z0.d, z0.d, z1.d, z0.d
 ; CHECK-NEXT:    ret
   %3 = or <vscale x 2 x i64> %1, %0
   %4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
index 78644691fb646..c20d319f2ac79 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
@@ -113,12 +113,14 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
 define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
 ; SPE-LABEL: test_f32_ueq_s:
 ; SPE:       # %bb.0:
-; SPE-NEXT:    efscmpgt cr0, r5, r6
-; SPE-NEXT:    bc 12, gt, .LBB7_2
-; SPE-NEXT:  # %bb.1:
 ; SPE-NEXT:    efscmplt cr0, r5, r6
-; SPE-NEXT:    bclr 4, gt, 0
-; SPE-NEXT:  .LBB7_2:
+; SPE-NEXT:    bc 12, gt, .LBB7_3
+; SPE-NEXT:  # %bb.1:
+; SPE-NEXT:    efscmpgt cr0, r5, r6
+; SPE-NEXT:    bc 12, gt, .LBB7_3
+; SPE-NEXT:  # %bb.2:
+; SPE-NEXT:    mr r4, r3
+; SPE-NEXT:  .LBB7_3:
 ; SPE-NEXT:    mr r3, r4
 ; SPE-NEXT:    blr
   %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
@@ -353,12 +355,14 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    evmergelo r7, r7, r8
 ; SPE-NEXT:    evmergelo r5, r5, r6
-; SPE-NEXT:    efdcmpgt cr0, r5, r7
-; SPE-NEXT:    bc 12, gt, .LBB21_2
-; SPE-NEXT:  # %bb.1:
 ; SPE-NEXT:    efdcmplt cr0, r5, r7
-; SPE-NEXT:    bclr 4, gt, 0
-; SPE-NEXT:  .LBB21_2:
+; SPE-NEXT:    bc 12, gt, .LBB21_3
+; SPE-NEXT:  # %bb.1:
+; SPE-NEXT:    efdcmpgt cr0, r5, r7
+; SPE-NEXT:    bc 12, gt, .LBB21_3
+; SPE-NEXT:  # %bb.2:
+; SPE-NEXT:    mr r4, r3
+; SPE-NEXT:  .LBB21_3:
 ; SPE-NEXT:    mr r3, r4
 ; SPE-NEXT:    blr
   %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
index 872a08c20eae8..310f0a66aa9b9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -28,7 +28,8 @@ define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
 define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; CHECK-LABEL: test_vorc:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vor 2, 3, 2
+; CHECK-NEXT:    vorc 3, 2, 3
+; CHECK-NEXT:    vorc 2, 2, 3
 ; CHECK-NEXT:    blr
        %tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
        %tmp2 = or <4 x i32> %x, %tmp1
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
index ba5c9edb3897d..e391228fc95a9 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
@@ -51,8 +51,7 @@ entry:
 define dso_local <16 x i8> @norA_andB_C(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: norA_andB_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor vs0, v2, v2
-; CHECK-NEXT:    xxeval v2, vs0, v3, v4, 14
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 224
 ; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %C
@@ -104,8 +103,7 @@ entry:
 define dso_local <4 x i32> @norA_xorB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: norA_xorB_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor vs0, v2, v2
-; CHECK-NEXT:    xxeval v2, vs0, v3, v4, 9
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 144
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -171,9 +169,7 @@ entry:
 define dso_local <4 x i32> @orA_norB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: orA_norB_C:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxlnor vs1, v3, v3
-; CHECK-NEXT:    xxeval v2, v2, vs1, vs0, 31
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 143
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index f98edc21bf2ea..b41220b01373a 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -80,11 +80,9 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -99,13 +97,10 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -120,12 +115,9 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxland vs1, v3, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -140,12 +132,9 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxland vs1, v3, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index 0baa420b79761..a51e392279d55 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -77,9 +77,8 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -93,12 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -112,11 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_nor_BC_B_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -130,11 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 6fc822d729457..54bf6c03f8c1a 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -77,9 +77,8 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -93,12 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -112,11 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_nor_BC_C_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -130,11 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
index 78ae36cc0ecf7..ba7680b27cc17 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
@@ -84,11 +84,9 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxleqv vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -104,13 +102,10 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxleqv vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -126,12 +121,9 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxleqv vs1, v3, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -147,12 +139,9 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxleqv vs1, v3, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 152
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
index 90928e668afd8..369587454a7c1 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
@@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -34,13 +32,10 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -55,12 +50,9 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxland vs0, v3, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
 ; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %C
@@ -75,12 +67,9 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxland vs0, v3, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 129
 ; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %C
@@ -96,9 +85,8 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v3, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -112,12 +100,10 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_B_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v3, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -131,11 +117,9 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_B_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v3, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -149,11 +133,9 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_B_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v3, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 131
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
@@ -168,9 +150,8 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v4, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -184,12 +165,10 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_C_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v4, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -203,11 +182,9 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
 ; CHECK-LABEL: ternary_A_C_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v4, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -221,11 +198,9 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
 ; CHECK-LABEL: ternary_A_C_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v4, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 133
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
@@ -239,11 +214,9 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -258,13 +231,10 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -279,12 +249,9 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlxor vs0, v3, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <16 x i8> %B, %C
@@ -299,12 +266,9 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlxor vs0, v3, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 134
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <8 x i16> %B, %C
@@ -319,11 +283,9 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -338,13 +300,10 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -359,12 +318,9 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxlnor vs1, v4, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, vs1, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>  ; Vector not operation
@@ -379,12 +335,9 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
 ; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 138
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>  ; Vector not operation
@@ -399,11 +352,9 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v3
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -418,13 +369,10 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnor vs0, v3, v3
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
@@ -439,12 +387,9 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxlnor vs1, v3, v3
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, vs1, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>  ; Vector not operation
@@ -459,12 +404,9 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
 ; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnor vs0, v3, v3
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 140
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>  ; Vector not operation
@@ -479,11 +421,9 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs1, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -499,13 +439,10 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -521,12 +458,9 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnand vs0, v3, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
 ; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %C
@@ -542,12 +476,9 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x
 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs1, vs1, vs1
-; CHECK-NEXT:    xxlnand vs0, v3, v4
-; CHECK-NEXT:    xxeval vs1, v3, v4, vs1, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 142
 ; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index 5031ebc930e11..0fc296cc5a4e2 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -267,11 +267,9 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    xxeval vs0, v3, v4, v5, 96
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -286,13 +284,10 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -307,12 +302,9 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltib v5, 7
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxlxor vs1, v3, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslb v2, v2, v5
 ; CHECK-NEXT:    vsrab v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
 ; CHECK-NEXT:    blr
 entry:
   %or = or <16 x i8> %B, %C
@@ -327,12 +319,9 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxspltiw v5, 983055
-; CHECK-NEXT:    xxleqv vs0, vs0, vs0
-; CHECK-NEXT:    xxlxor vs1, v3, v4
-; CHECK-NEXT:    xxeval vs0, v3, v4, vs0, 96
 ; CHECK-NEXT:    vslh v2, v2, v5
 ; CHECK-NEXT:    vsrah v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
 ; CHECK-NEXT:    blr
 entry:
   %or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index bc57a31f063b5..148be83892b72 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -756,9 +756,9 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
 ; SSE2-NEXT:    pand %xmm6, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    paddq %xmm4, %xmm0
 ; SSE2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
index 37df42ea2682d..3fcfb9d278da7 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
@@ -714,19 +714,18 @@ define <64 x i8> @tzmsk_v64i8(<64 x i8> %a0, <64 x i8> %a1) {
 ; AVX512F-NEXT:    vpmovmskb %ymm4, %ecx
 ; AVX512F-NEXT:    shlq $32, %rcx
 ; AVX512F-NEXT:    leaq (%rax,%rcx), %rdx
-; AVX512F-NEXT:    addq $-1, %rdx
-; AVX512F-NEXT:    notq %rcx
-; AVX512F-NEXT:    andnq %rcx, %rax, %rax
-; AVX512F-NEXT:    andq %rax, %rdx
-; AVX512F-NEXT:    movq %rdx, %rax
-; AVX512F-NEXT:    movl %edx, %ecx
-; AVX512F-NEXT:    kmovw %edx, %k1
-; AVX512F-NEXT:    shrq $32, %rdx
-; AVX512F-NEXT:    shrq $48, %rax
-; AVX512F-NEXT:    shrl $16, %ecx
-; AVX512F-NEXT:    kmovw %ecx, %k2
-; AVX512F-NEXT:    kmovw %eax, %k3
-; AVX512F-NEXT:    kmovw %edx, %k4
+; AVX512F-NEXT:    addq %rcx, %rax
+; AVX512F-NEXT:    addq $-1, %rax
+; AVX512F-NEXT:    andnq %rax, %rdx, %rax
+; AVX512F-NEXT:    movq %rax, %rcx
+; AVX512F-NEXT:    movl %eax, %edx
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    shrq $32, %rax
+; AVX512F-NEXT:    shrq $48, %rcx
+; AVX512F-NEXT:    shrl $16, %edx
+; AVX512F-NEXT:    kmovw %edx, %k2
+; AVX512F-NEXT:    kmovw %ecx, %k3
+; AVX512F-NEXT:    kmovw %eax, %k4
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm3, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm0, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 8d5bbb4ae8e1e..8c91274abf3dd 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -183,32 +183,14 @@ define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind {
 }
 
 define i64 @or_build_pair_not(i32 %a0, i32 %a1) {
-; SSE-LABEL: or_build_pair_not:
-; SSE:       # %bb.0:
-; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
-; SSE-NEXT:    shlq $32, %rsi
-; SSE-NEXT:    movl %edi, %eax
-; SSE-NEXT:    orq %rsi, %rax
-; SSE-NEXT:    notq %rax
-; SSE-NEXT:    retq
-;
-; AVX1-LABEL: or_build_pair_not:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    # kill: def $esi killed $esi def $rsi
-; AVX1-NEXT:    shlq $32, %rsi
-; AVX1-NEXT:    movl %edi, %eax
-; AVX1-NEXT:    orq %rsi, %rax
-; AVX1-NEXT:    notq %rax
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: or_build_pair_not:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT:    shlq $32, %rsi
-; AVX2-NEXT:    notq %rsi
-; AVX2-NEXT:    movl %edi, %eax
-; AVX2-NEXT:    andnq %rsi, %rax, %rax
-; AVX2-NEXT:    retq
+; CHECK-LABEL: or_build_pair_not:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    shlq $32, %rsi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    notq %rax
+; CHECK-NEXT:    retq
   %n0 = xor i32 %a0, -1
   %n1 = xor i32 %a1, -1
   %x0 = zext i32 %n0 to i64
@@ -280,9 +262,10 @@ define i64 @PR89533(<64 x i8> %a0) {
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm0
 ; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
 ; AVX2-NEXT:    shlq $32, %rcx
+; AVX2-NEXT:    orq %rax, %rcx
 ; AVX2-NEXT:    notq %rcx
-; AVX2-NEXT:    andnq %rcx, %rax, %rax
-; AVX2-NEXT:    tzcntq %rax, %rax
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq %rcx, %rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
   %cmp = icmp ne <64 x i8> %a0, <i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95>
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 4e31177023b08..21657bf67f233 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -437,12 +437,13 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrld $8, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
index 7919495821efd..905d1648564fb 100644
--- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
@@ -1487,12 +1487,13 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; SSE-NEXT:    por %xmm1, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrld $8, %xmm1
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    psrld $16, %xmm1
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE-NEXT:    pxor %xmm1, %xmm2
-; SSE-NEXT:    pandn %xmm2, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm2, %xmm3
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    psrld $16, %xmm0
+; SSE-NEXT:    pandn %xmm3, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrlw $1, %xmm1
 ; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 478d80e9827a5..badfd1af940ca 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -179,23 +179,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
 ; CHECK-NOBMI-NEXT:    pxor %xmm4, %xmm1
 ; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm6
 ; CHECK-NOBMI-NEXT:    pcmpgtd %xmm4, %xmm6
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; CHECK-NOBMI-NEXT:    pand %xmm6, %xmm1
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
-; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm4
-; CHECK-NOBMI-NEXT:    pandn %xmm4, %xmm1
 ; CHECK-NOBMI-NEXT:    pxor %xmm5, %xmm3
 ; CHECK-NOBMI-NEXT:    pxor %xmm3, %xmm0
-; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm4
-; CHECK-NOBMI-NEXT:    pcmpgtd %xmm3, %xmm4
+; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm5
+; CHECK-NOBMI-NEXT:    pcmpgtd %xmm3, %xmm5
+; CHECK-NOBMI-NEXT:    movdqa %xmm5, %xmm7
+; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2]
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
 ; CHECK-NOBMI-NEXT:    pcmpeqd %xmm3, %xmm0
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NOBMI-NEXT:    pand %xmm4, %xmm0
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
-; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm3
-; CHECK-NOBMI-NEXT:    pandn %xmm3, %xmm0
-; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; CHECK-NOBMI-NEXT:    andps %xmm7, %xmm0
+; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3]
+; CHECK-NOBMI-NEXT:    orps %xmm5, %xmm0
+; CHECK-NOBMI-NEXT:    xorps %xmm2, %xmm0
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll
index 0713f0bbe244c..c84a1159ad56a 100644
--- a/llvm/test/CodeGen/X86/machine-cp.ll
+++ b/llvm/test/CodeGen/X86/machine-cp.ll
@@ -100,38 +100,55 @@ define <16 x float> @foo(<16 x float> %x) {
 ; CHECK-LABEL: foo:
 ; CHECK:       ## %bb.0: ## %bb
 ; CHECK-NEXT:    xorps %xmm5, %xmm5
-; CHECK-NEXT:    cvttps2dq %xmm3, %xmm6
+; CHECK-NEXT:    cvttps2dq %xmm3, %xmm8
 ; CHECK-NEXT:    movaps %xmm3, %xmm4
 ; CHECK-NEXT:    cmpltps %xmm5, %xmm4
-; CHECK-NEXT:    cvttps2dq %xmm2, %xmm3
+; CHECK-NEXT:    movaps {{.*#+}} xmm7 = [13,14,15,16]
+; CHECK-NEXT:    movaps %xmm4, %xmm6
+; CHECK-NEXT:    orps %xmm7, %xmm6
+; CHECK-NEXT:    cvtdq2ps %xmm8, %xmm3
+; CHECK-NEXT:    andps %xmm7, %xmm3
+; CHECK-NEXT:    andps %xmm6, %xmm3
+; CHECK-NEXT:    andnps %xmm4, %xmm6
+; CHECK-NEXT:    cvttps2dq %xmm2, %xmm4
 ; CHECK-NEXT:    movaps %xmm2, %xmm7
 ; CHECK-NEXT:    cmpltps %xmm5, %xmm7
-; CHECK-NEXT:    cvttps2dq %xmm1, %xmm2
+; CHECK-NEXT:    movaps {{.*#+}} xmm8 = [9,10,11,12]
+; CHECK-NEXT:    movaps %xmm7, %xmm9
+; CHECK-NEXT:    orps %xmm8, %xmm9
+; CHECK-NEXT:    cvtdq2ps %xmm4, %xmm2
+; CHECK-NEXT:    andps %xmm8, %xmm2
+; CHECK-NEXT:    andps %xmm9, %xmm2
+; CHECK-NEXT:    andnps %xmm7, %xmm9
+; CHECK-NEXT:    cvttps2dq %xmm1, %xmm4
+; CHECK-NEXT:    cmpltps %xmm5, %xmm1
+; CHECK-NEXT:    movaps {{.*#+}} xmm7 = [5,6,7,8]
 ; CHECK-NEXT:    movaps %xmm1, %xmm8
-; CHECK-NEXT:    cmpltps %xmm5, %xmm8
+; CHECK-NEXT:    orps %xmm7, %xmm8
+; CHECK-NEXT:    cvtdq2ps %xmm4, %xmm4
+; CHECK-NEXT:    andps %xmm7, %xmm4
+; CHECK-NEXT:    andps %xmm8, %xmm4
+; CHECK-NEXT:    andnps %xmm1, %xmm8
 ; CHECK-NEXT:    cvttps2dq %xmm0, %xmm1
-; CHECK-NEXT:    movaps %xmm0, %xmm9
-; CHECK-NEXT:    cmpltps %xmm5, %xmm9
+; CHECK-NEXT:    cmpltps %xmm5, %xmm0
 ; CHECK-NEXT:    movaps {{.*#+}} xmm5 = [1,2,3,4]
-; CHECK-NEXT:    orps %xmm5, %xmm9
-; CHECK-NEXT:    movaps {{.*#+}} xmm10 = [5,6,7,8]
-; CHECK-NEXT:    orps %xmm10, %xmm8
-; CHECK-NEXT:    movaps {{.*#+}} xmm11 = [9,10,11,12]
-; CHECK-NEXT:    orps %xmm11, %xmm7
-; CHECK-NEXT:    movaps {{.*#+}} xmm12 = [13,14,15,16]
-; CHECK-NEXT:    orps %xmm12, %xmm4
-; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm0
-; CHECK-NEXT:    cvtdq2ps %xmm2, %xmm1
-; CHECK-NEXT:    cvtdq2ps %xmm3, %xmm2
-; CHECK-NEXT:    cvtdq2ps %xmm6, %xmm3
-; CHECK-NEXT:    andps %xmm5, %xmm0
-; CHECK-NEXT:    andps %xmm9, %xmm0
-; CHECK-NEXT:    andps %xmm10, %xmm1
-; CHECK-NEXT:    andps %xmm8, %xmm1
-; CHECK-NEXT:    andps %xmm11, %xmm2
-; CHECK-NEXT:    andps %xmm7, %xmm2
-; CHECK-NEXT:    andps %xmm12, %xmm3
-; CHECK-NEXT:    andps %xmm4, %xmm3
+; CHECK-NEXT:    movaps %xmm0, %xmm7
+; CHECK-NEXT:    orps %xmm5, %xmm7
+; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
+; CHECK-NEXT:    andps %xmm5, %xmm1
+; CHECK-NEXT:    andps %xmm7, %xmm1
+; CHECK-NEXT:    andnps %xmm0, %xmm7
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-NEXT:    andps %xmm0, %xmm7
+; CHECK-NEXT:    orps %xmm7, %xmm1
+; CHECK-NEXT:    andps %xmm0, %xmm8
+; CHECK-NEXT:    orps %xmm8, %xmm4
+; CHECK-NEXT:    andps %xmm0, %xmm9
+; CHECK-NEXT:    orps %xmm9, %xmm2
+; CHECK-NEXT:    andps %xmm0, %xmm6
+; CHECK-NEXT:    orps %xmm6, %xmm3
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    movaps %xmm4, %xmm1
 ; CHECK-NEXT:    retq
 bb:
   %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index aeb8fe93930a0..88934a382bbfa 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -8,36 +8,34 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
 ; SSE2-LABEL: PR45808:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
 ; SSE2-NEXT:    pxor %xmm4, %xmm6
-; SSE2-NEXT:    movdqa %xmm1, %xmm7
-; SSE2-NEXT:    pxor %xmm4, %xmm7
-; SSE2-NEXT:    movdqa %xmm7, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
 ; SSE2-NEXT:    movdqa %xmm2, %xmm8
 ; SSE2-NEXT:    pxor %xmm4, %xmm8
 ; SSE2-NEXT:    pxor %xmm0, %xmm4
 ; SSE2-NEXT:    movdqa %xmm4, %xmm9
 ; SSE2-NEXT:    pcmpgtd %xmm8, %xmm9
 ; SSE2-NEXT:    movdqa %xmm9, %xmm10
-; SSE2-NEXT:    shufps {{.*#+}} xmm10 = xmm10[0,2],xmm5[0,2]
-; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    shufps {{.*#+}} xmm10 = xmm10[0,2],xmm7[0,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
 ; SSE2-NEXT:    pcmpeqd %xmm8, %xmm4
-; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm7[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3]
 ; SSE2-NEXT:    andps %xmm10, %xmm4
-; SSE2-NEXT:    shufps {{.*#+}} xmm9 = xmm9[1,3],xmm5[1,3]
-; SSE2-NEXT:    pcmpeqd %xmm6, %xmm6
-; SSE2-NEXT:    pxor %xmm9, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[2,2,3,3]
-; SSE2-NEXT:    pandn %xmm6, %xmm4
-; SSE2-NEXT:    pxor %xmm6, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm9 = xmm9[1,3],xmm7[1,3]
+; SSE2-NEXT:    orps %xmm4, %xmm9
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    pxor %xmm9, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
 ; SSE2-NEXT:    pand %xmm4, %xmm0
 ; SSE2-NEXT:    pandn %xmm2, %xmm4
 ; SSE2-NEXT:    por %xmm4, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
-; SSE2-NEXT:    por %xmm7, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3]
 ; SSE2-NEXT:    pslld $31, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
 ; SSE2-NEXT:    pand %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll
index d97e603c636af..f526db00df606 100644
--- a/llvm/test/CodeGen/X86/setcc-combine.ll
+++ b/llvm/test/CodeGen/X86/setcc-combine.ll
@@ -1020,9 +1020,9 @@ define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %na = xor <2 x i64> %a, <i64 -1, i64 -1>
   %nb = xor <2 x i64> %b, <i64 -1, i64 -1>
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index 6e68b37bec98a..84856aab85079 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -198,9 +198,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
 ; CHECK-SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-SSE2-NEXT:    pand %xmm2, %xmm1
 ; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm2
-; CHECK-SSE2-NEXT:    pandn %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
 ; CHECK-SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; CHECK-SSE2-NEXT:    retq
 ;
@@ -223,9 +223,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
 ; CHECK-SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-SSE41-NEXT:    pand %xmm2, %xmm1
 ; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-SSE41-NEXT:    pxor %xmm0, %xmm2
-; CHECK-SSE41-NEXT:    pandn %xmm2, %xmm1
+; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
+; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-SSE41-NEXT:    pxor %xmm0, %xmm1
 ; CHECK-SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; CHECK-SSE41-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
index 25ba593d47062..63e08de7fdf53 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT:    pxor %xmm1, %xmm2
-; SSE41-NEXT:    pandn %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: ge_v2i64:
@@ -606,9 +606,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: le_v2i64:
@@ -623,9 +623,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT:    pxor %xmm1, %xmm2
-; SSE41-NEXT:    pandn %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: le_v2i64:
diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
index bd730e7dbefbc..9d65ff94061b0 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT:    pxor %xmm1, %xmm2
-; SSE41-NEXT:    pandn %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: ge_v2i64:
@@ -722,9 +722,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: le_v2i64:
@@ -739,9 +739,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT:    pxor %xmm1, %xmm2
-; SSE41-NEXT:    pandn %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; SSE42-LABEL: le_v2i64:
diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll
index 0fc298a2b4cd4..c1045c7b72f2c 100644
--- a/llvm/test/CodeGen/X86/vec_compare.ll
+++ b/llvm/test/CodeGen/X86/vec_compare.ll
@@ -128,9 +128,9 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp sge <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
@@ -150,9 +150,9 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp sle <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
@@ -212,9 +212,9 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp uge <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
@@ -234,9 +234,9 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand %xmm3, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    retl
 	%C = icmp ule <2 x i64> %A, %B
 	%D = sext <2 x i1> %C to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll
index 048117dd43e66..4a3bcbb0a96a4 100644
--- a/llvm/test/CodeGen/X86/vec_ctbits.ll
+++ b/llvm/test/CodeGen/X86/vec_ctbits.ll
@@ -49,12 +49,13 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind {
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlq $16, %xmm1
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrlq $32, %xmm1
 ; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
 ; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm3
+; CHECK-NEXT:    pandn %xmm2, %xmm3
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    psrlq $32, %xmm0
+; CHECK-NEXT:    pandn %xmm3, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlw $1, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -151,12 +152,13 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind {
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrld $8, %xmm1
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrld $16, %xmm1
 ; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
 ; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm3
+; CHECK-NEXT:    pandn %xmm2, %xmm3
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    psrld $16, %xmm0
+; CHECK-NEXT:    pandn %xmm3, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlw $1, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll
index ade6b5c8d6bdf..5a71878ea4579 100644
--- a/llvm/test/CodeGen/X86/vec_setcc-2.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll
@@ -448,14 +448,13 @@ define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) {
 ; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE2-NEXT:    pxor %xmm2, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
 ; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    pandn %xmm3, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ule_v2i64_splat:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
index 990113b1ecc1e..716090abf1c4a 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -30,12 +30,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlq $16, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlq $32, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrlq $32, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -70,12 +71,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlq $16, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlq $32, %xmm1
 ; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE3-NEXT:    pxor %xmm1, %xmm2
-; SSE3-NEXT:    pandn %xmm2, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
+; SSE3-NEXT:    por %xmm1, %xmm0
+; SSE3-NEXT:    psrlq $32, %xmm0
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -308,12 +310,13 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlq $16, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlq $32, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrlq $32, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -348,12 +351,13 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlq $16, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlq $32, %xmm1
 ; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE3-NEXT:    pxor %xmm1, %xmm2
-; SSE3-NEXT:    pandn %xmm2, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
+; SSE3-NEXT:    por %xmm1, %xmm0
+; SSE3-NEXT:    psrlq $32, %xmm0
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -583,12 +587,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrld $8, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -625,12 +630,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrld $8, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrld $16, %xmm1
 ; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE3-NEXT:    pxor %xmm1, %xmm2
-; SSE3-NEXT:    pandn %xmm2, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
+; SSE3-NEXT:    por %xmm1, %xmm0
+; SSE3-NEXT:    psrld $16, %xmm0
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -832,12 +838,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrld $8, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -874,12 +881,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrld $8, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrld $16, %xmm1
 ; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE3-NEXT:    pxor %xmm1, %xmm2
-; SSE3-NEXT:    pandn %xmm2, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
+; SSE3-NEXT:    por %xmm1, %xmm0
+; SSE3-NEXT:    psrld $16, %xmm0
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1078,12 +1086,13 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $8, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1114,12 +1123,13 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $4, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $8, %xmm1
 ; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE3-NEXT:    pxor %xmm1, %xmm2
-; SSE3-NEXT:    pandn %xmm2, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
+; SSE3-NEXT:    por %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $8, %xmm0
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1286,12 +1296,13 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $8, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    pandn %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1322,12 +1333,13 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
 ; SSE3-NEXT:    por %xmm1, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $4, %xmm1
-; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $8, %xmm1
 ; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE3-NEXT:    pxor %xmm1, %xmm2
-; SSE3-NEXT:    pandn %xmm2, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
+; SSE3-NEXT:    por %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $8, %xmm0
+; SSE3-NEXT:    pandn %xmm3, %xmm0
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $1, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1493,28 +1505,29 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
 ; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    pandn %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $1, %xmm1
-; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    psubb %xmm1, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm2
+; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT:    psubb %xmm2, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pand %xmm2, %xmm3
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    paddb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm0
 ; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    paddb %xmm3, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $4, %xmm2
+; SSE2-NEXT:    paddb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv16i8:
@@ -1526,28 +1539,29 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $2, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
 ; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $4, %xmm1
-; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE3-NEXT:    pand %xmm2, %xmm1
-; SSE3-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE3-NEXT:    pxor %xmm1, %xmm3
+; SSE3-NEXT:    psrlw $4, %xmm0
+; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE3-NEXT:    pand %xmm1, %xmm0
 ; SSE3-NEXT:    pandn %xmm3, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $1, %xmm1
-; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT:    psubb %xmm1, %xmm0
-; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $1, %xmm2
+; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE3-NEXT:    psubb %xmm2, %xmm0
+; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSE3-NEXT:    pand %xmm1, %xmm3
+; SSE3-NEXT:    pand %xmm2, %xmm3
 ; SSE3-NEXT:    psrlw $2, %xmm0
-; SSE3-NEXT:    pand %xmm1, %xmm0
-; SSE3-NEXT:    paddb %xmm3, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $4, %xmm1
-; SSE3-NEXT:    paddb %xmm1, %xmm0
 ; SSE3-NEXT:    pand %xmm2, %xmm0
+; SSE3-NEXT:    paddb %xmm3, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $4, %xmm2
+; SSE3-NEXT:    paddb %xmm2, %xmm0
+; SSE3-NEXT:    pand %xmm1, %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv16i8:
@@ -1656,28 +1670,29 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
 ; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm2, %xmm3
 ; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    pandn %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $1, %xmm1
-; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    psubb %xmm1, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm2
+; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT:    psubb %xmm2, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pand %xmm2, %xmm3
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    paddb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm0
 ; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    paddb %xmm3, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $4, %xmm2
+; SSE2-NEXT:    paddb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv16i8u:
@@ -1689,28 +1704,29 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
 ; SSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSE3-NEXT:    psrlw $2, %xmm1
 ; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE3-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT:    pxor %xmm1, %xmm2
+; SSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSE3-NEXT:    pandn %xmm2, %xmm3
 ; SSE3-NEXT:    por %xmm1, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $4, %xmm1
-; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE3-NEXT:    pand %xmm2, %xmm1
-; SSE3-NEXT:    pcmpeqd %xmm3, %xmm3
-; SSE3-NEXT:    pxor %xmm1, %xmm3
+; SSE3-NEXT:    psrlw $4, %xmm0
+; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE3-NEXT:    pand %xmm1, %xmm0
 ; SSE3-NEXT:    pandn %xmm3, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $1, %xmm1
-; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT:    psubb %xmm1, %xmm0
-; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $1, %xmm2
+; SSE3-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE3-NEXT:    psubb %xmm2, %xmm0
+; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSE3-NEXT:    pand %xmm1, %xmm3
+; SSE3-NEXT:    pand %xmm2, %xmm3
 ; SSE3-NEXT:    psrlw $2, %xmm0
-; SSE3-NEXT:    pand %xmm1, %xmm0
-; SSE3-NEXT:    paddb %xmm3, %xmm0
-; SSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSE3-NEXT:    psrlw $4, %xmm1
-; SSE3-NEXT:    paddb %xmm1, %xmm0
 ; SSE3-NEXT:    pand %xmm2, %xmm0
+; SSE3-NEXT:    paddb %xmm3, %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $4, %xmm2
+; SSE3-NEXT:    paddb %xmm2, %xmm0
+; SSE3-NEXT:    pand %xmm1, %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv16i8u:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index 8c24aa50a626e..8fe00afe0c0bb 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -26,19 +26,18 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrlq $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlq $16, %zmm0, %zmm1
-; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrlq $32, %zmm2, %zmm3
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
@@ -54,32 +53,31 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrlq $8, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrlq $16, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT:    vpsrlq $32, %zmm2, %zmm3
+; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
+; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm4
 ; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512DQ-NEXT:    # ymm5 = mem[0,1,0,1]
 ; AVX512DQ-NEXT:    vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm5, %ymm1
-; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpsrlq $32, %ymm0, %ymm6
-; AVX512DQ-NEXT:    vpor %ymm6, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpandn %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm2, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
 ; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpsrlq $32, %ymm2, %ymm6
+; AVX512DQ-NEXT:    vpor %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpandn %ymm1, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm3, %ymm1
+; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512DQ-NEXT:    retq
   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
   ret <8 x i64> %out
@@ -107,19 +105,18 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrlq $8, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrlq $16, %zmm0, %zmm1
-; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrlq $32, %zmm2, %zmm3
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
@@ -135,32 +132,31 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrlq $8, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrlq $16, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT:    vpsrlq $32, %zmm2, %zmm3
+; AVX512DQ-NEXT:    vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
+; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm4
 ; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512DQ-NEXT:    # ymm5 = mem[0,1,0,1]
 ; AVX512DQ-NEXT:    vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm5, %ymm1
-; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpsrlq $32, %ymm0, %ymm6
-; AVX512DQ-NEXT:    vpor %ymm6, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpandn %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm2, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
 ; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpsrlq $32, %ymm2, %ymm6
+; AVX512DQ-NEXT:    vpor %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpandn %ymm1, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm3, %ymm1
+; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512DQ-NEXT:    retq
   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
   ret <8 x i64> %out
@@ -186,19 +182,18 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrld $4, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $8, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrld $16, %zmm2, %zmm3
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
@@ -216,39 +211,38 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrld $4, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $8, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT:    vpsrld $16, %zmm2, %zmm3
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm0 = ~(zmm0 | zmm3 | zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm1, %ymm5
 ; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512DQ-NEXT:    # ymm6 = mem[0,1,0,1]
 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm6, %ymm3
-; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
-; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm7, %ymm7
-; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
-; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpackuswb %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpor %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpandn %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
-; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm7 = ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[6],ymm5[6],ymm1[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[4],ymm5[4],ymm1[5],ymm5[5]
 ; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm7, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpor %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpandn %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm2, %ymm2
 ; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
 ; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
   ret <16 x i32> %out
@@ -274,19 +268,18 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
 ; AVX512BW-NEXT:    vpsrld $4, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $8, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT:    vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vpsrld $16, %zmm2, %zmm3
+; AVX512BW-NEXT:    vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT:    vpternlogd {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT:    vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
@@ -304,39 +297,38 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
 ; AVX512DQ-NEXT:    vpsrld $4, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $8, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT:    vpsrld $16, %zmm2, %zmm3
+; AVX512DQ-NEXT:    vpternlogd {{.*#+}} zmm0 = ~(zmm0 | zmm3 | zmm1)
+; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512DQ-NEXT:    vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm1, %ymm5
 ; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
 ; AVX512DQ-NEXT:    # ymm6 = mem[0,1,0,1]
 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm6, %ymm3
-; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
-; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm7, %ymm7
-; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
-; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpackuswb %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT:    vpor %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpandn %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
-; AVX512DQ-NEXT:    vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
-; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm7 = ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[6],ymm5[6],ymm1[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[4],ymm5[4],ymm1[5],ymm5[5]
 ; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm7, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpor %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpandn %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm2, %ymm2
 ; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
 ; AVX512DQ-NEXT:    vpsadbw %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    retq
   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
   ret <16 x i32> %out
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
index 1473da6aac5ea..555d033ac5ee4 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
@@ -17,12 +17,13 @@ define <2 x i32> @illegal_ctlz(<2 x i32> %v1) {
 ; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrld $8, %xmm1
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrld $16, %xmm1
 ; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
 ; CHECK-NEXT:    pxor %xmm1, %xmm2
-; CHECK-NEXT:    pandn %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm3
+; CHECK-NEXT:    pandn %xmm2, %xmm3
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    psrld $16, %xmm0
+; CHECK-NEXT:    pandn %xmm3, %xmm0
 ; CHECK-NEXT:    movdqa %xmm0, %xmm1
 ; CHECK-NEXT:    psrlw $1, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
index d8e955c93581e..c1d30b6d5a995 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
@@ -826,11 +826,11 @@ define <2 x i64> @ne_1_v2i64(<2 x i64> %0) {
 ; SSE-NEXT:    pcmpgtd %xmm2, %xmm3
 ; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE-NEXT:    pand %xmm4, %xmm0
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
-; SSE-NEXT:    pxor %xmm1, %xmm2
-; SSE-NEXT:    pandn %xmm2, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; SSE-NEXT:    por %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: ne_1_v2i64:
diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
index 55f2258aad018..97124f0a9d8d9 100644
--- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -117,9 +117,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: uge_v2i64:
@@ -136,9 +136,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT:    pxor %xmm1, %xmm2
-; SSE41-NEXT:    pandn %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: uge_v2i64:
@@ -170,9 +170,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: ule_v2i64:
@@ -189,9 +189,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; SSE41-NEXT:    pand %xmm3, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT:    pxor %xmm1, %xmm2
-; SSE41-NEXT:    pandn %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: ule_v2i64:
>From c95c10a91b7c657f514282bd821efb99c5c549d2 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 22 Oct 2025 18:48:51 +0000
Subject: [PATCH 18/20] [LoongArch][SystemZ]: Updated tests
---
 .../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 141 +++++++++++-------
 llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll   |  20 ++-
 llvm/test/CodeGen/SystemZ/vec-eval.ll         |  49 +++---
 3 files changed, 136 insertions(+), 74 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 27be02c50f1c7..4c5eab036dbb4 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -21,13 +21,15 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
 ; LA32R:       # %bb.0:
 ; LA32R-NEXT:    andi $a1, $a0, 254
 ; LA32R-NEXT:    srli.w $a1, $a1, 1
+; LA32R-NEXT:    nor $a2, $a0, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    andi $a1, $a0, 252
 ; LA32R-NEXT:    srli.w $a1, $a1, 2
+; LA32R-NEXT:    andn $a2, $a2, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    andi $a1, $a0, 240
-; LA32R-NEXT:    srli.w $a1, $a1, 4
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    andi $a0, $a0, 240
+; LA32R-NEXT:    srli.w $a0, $a0, 4
+; LA32R-NEXT:    andn $a0, $a2, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    andi $a1, $a1, 85
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
@@ -60,23 +62,28 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
 define i16 @test_ctlz_i16(i16 %a) nounwind {
 ; LA32R-LABEL: test_ctlz_i16:
 ; LA32R:       # %bb.0:
+; LA32R-NEXT:    srli.w $a1, $a0, 1
+; LA32R-NEXT:    lu12i.w $a2, 7
+; LA32R-NEXT:    ori $a2, $a2, 4095
+; LA32R-NEXT:    and $a1, $a1, $a2
+; LA32R-NEXT:    nor $a2, $a0, $zero
+; LA32R-NEXT:    andn $a2, $a2, $a1
+; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    lu12i.w $a1, 15
-; LA32R-NEXT:    ori $a2, $a1, 4094
-; LA32R-NEXT:    and $a2, $a0, $a2
-; LA32R-NEXT:    srli.w $a2, $a2, 1
-; LA32R-NEXT:    or $a0, $a0, $a2
-; LA32R-NEXT:    ori $a2, $a1, 4092
-; LA32R-NEXT:    and $a2, $a0, $a2
-; LA32R-NEXT:    srli.w $a2, $a2, 2
-; LA32R-NEXT:    or $a0, $a0, $a2
-; LA32R-NEXT:    ori $a2, $a1, 4080
-; LA32R-NEXT:    and $a2, $a0, $a2
-; LA32R-NEXT:    srli.w $a2, $a2, 4
-; LA32R-NEXT:    or $a0, $a0, $a2
+; LA32R-NEXT:    ori $a3, $a1, 4092
+; LA32R-NEXT:    and $a3, $a0, $a3
+; LA32R-NEXT:    srli.w $a3, $a3, 2
+; LA32R-NEXT:    andn $a2, $a2, $a3
+; LA32R-NEXT:    or $a0, $a0, $a3
+; LA32R-NEXT:    ori $a3, $a1, 4080
+; LA32R-NEXT:    and $a3, $a0, $a3
+; LA32R-NEXT:    srli.w $a3, $a3, 4
+; LA32R-NEXT:    andn $a2, $a2, $a3
+; LA32R-NEXT:    or $a0, $a0, $a3
 ; LA32R-NEXT:    ori $a1, $a1, 3840
-; LA32R-NEXT:    and $a1, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a1, 8
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    and $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a0, $a0, 8
+; LA32R-NEXT:    andn $a0, $a2, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    lu12i.w $a2, 5
 ; LA32R-NEXT:    ori $a2, $a2, 1365
@@ -117,15 +124,19 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; LA32R-LABEL: test_ctlz_i32:
 ; LA32R:       # %bb.0:
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
+; LA32R-NEXT:    nor $a2, $a0, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 2
+; LA32R-NEXT:    andn $a2, $a2, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 4
+; LA32R-NEXT:    andn $a2, $a2, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 8
+; LA32R-NEXT:    andn $a2, $a2, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 16
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a0, $a0, 16
+; LA32R-NEXT:    andn $a0, $a2, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    lu12i.w $a2, 349525
 ; LA32R-NEXT:    ori $a2, $a2, 1365
@@ -175,15 +186,19 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; LA32R-NEXT:    bne $a1, $zero, .LBB3_2
 ; LA32R-NEXT:  # %bb.1:
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
+; LA32R-NEXT:    nor $a6, $a0, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 2
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 4
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 8
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 16
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a0, $a0, 16
+; LA32R-NEXT:    andn $a0, $a6, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    and $a1, $a1, $a5
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
@@ -201,15 +216,19 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; LA32R-NEXT:    ret
 ; LA32R-NEXT:  .LBB3_2:
 ; LA32R-NEXT:    srli.w $a0, $a1, 1
+; LA32R-NEXT:    nor $a6, $a1, $a0
 ; LA32R-NEXT:    or $a0, $a1, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 2
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 4
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 8
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 16
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a0, $a0, 16
+; LA32R-NEXT:    andn $a0, $a6, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    and $a1, $a1, $a5
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
@@ -250,14 +269,17 @@ define i8 @test_not_ctlz_i8(i8 %a) nounwind {
 ; LA32R:       # %bb.0:
 ; LA32R-NEXT:    ori $a1, $zero, 254
 ; LA32R-NEXT:    andn $a1, $a1, $a0
+; LA32R-NEXT:    nor $a2, $a0, $zero
 ; LA32R-NEXT:    srli.w $a1, $a1, 1
+; LA32R-NEXT:    nor $a2, $a2, $a1
 ; LA32R-NEXT:    orn $a0, $a1, $a0
 ; LA32R-NEXT:    andi $a1, $a0, 252
 ; LA32R-NEXT:    srli.w $a1, $a1, 2
+; LA32R-NEXT:    andn $a2, $a2, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    andi $a1, $a0, 240
-; LA32R-NEXT:    srli.w $a1, $a1, 4
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    andi $a0, $a0, 240
+; LA32R-NEXT:    srli.w $a0, $a0, 4
+; LA32R-NEXT:    andn $a0, $a2, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    andi $a1, $a1, 85
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
@@ -293,19 +315,22 @@ define i16 @test_not_ctlz_i16(i16 %a) nounwind {
 ; LA32R-NEXT:    ori $a2, $a1, 4094
 ; LA32R-NEXT:    andn $a2, $a2, $a0
 ; LA32R-NEXT:    srli.w $a2, $a2, 1
+; LA32R-NEXT:    andn $a3, $a0, $a2
 ; LA32R-NEXT:    orn $a0, $a2, $a0
 ; LA32R-NEXT:    ori $a2, $a1, 4092
 ; LA32R-NEXT:    and $a2, $a0, $a2
 ; LA32R-NEXT:    srli.w $a2, $a2, 2
+; LA32R-NEXT:    andn $a3, $a3, $a2
 ; LA32R-NEXT:    or $a0, $a0, $a2
 ; LA32R-NEXT:    ori $a2, $a1, 4080
 ; LA32R-NEXT:    and $a2, $a0, $a2
 ; LA32R-NEXT:    srli.w $a2, $a2, 4
+; LA32R-NEXT:    andn $a3, $a3, $a2
 ; LA32R-NEXT:    or $a0, $a0, $a2
 ; LA32R-NEXT:    ori $a1, $a1, 3840
-; LA32R-NEXT:    and $a1, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a1, 8
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    and $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a0, $a0, 8
+; LA32R-NEXT:    andn $a0, $a3, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    lu12i.w $a2, 5
 ; LA32R-NEXT:    ori $a2, $a2, 1365
@@ -345,16 +370,20 @@ define i32 @test_not_ctlz_i32(i32 %a) nounwind {
 ; LA32R-LABEL: test_not_ctlz_i32:
 ; LA32R:       # %bb.0:
 ; LA32R-NEXT:    nor $a1, $a0, $zero
-; LA32R-NEXT:    srli.w $a1, $a1, 1
-; LA32R-NEXT:    orn $a0, $a1, $a0
-; LA32R-NEXT:    srli.w $a1, $a0, 2
-; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 4
-; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 8
-; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 16
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a2, $a1, 1
+; LA32R-NEXT:    nor $a1, $a1, $a2
+; LA32R-NEXT:    orn $a0, $a2, $a0
+; LA32R-NEXT:    srli.w $a2, $a0, 2
+; LA32R-NEXT:    andn $a1, $a1, $a2
+; LA32R-NEXT:    or $a0, $a0, $a2
+; LA32R-NEXT:    srli.w $a2, $a0, 4
+; LA32R-NEXT:    andn $a1, $a1, $a2
+; LA32R-NEXT:    or $a0, $a0, $a2
+; LA32R-NEXT:    srli.w $a2, $a0, 8
+; LA32R-NEXT:    andn $a1, $a1, $a2
+; LA32R-NEXT:    or $a0, $a0, $a2
+; LA32R-NEXT:    srli.w $a0, $a0, 16
+; LA32R-NEXT:    andn $a0, $a1, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    lu12i.w $a2, 349525
 ; LA32R-NEXT:    ori $a2, $a2, 1365
@@ -406,16 +435,20 @@ define i64 @test_not_ctlz_i64(i64 %a) nounwind {
 ; LA32R-NEXT:    bne $a6, $zero, .LBB7_2
 ; LA32R-NEXT:  # %bb.1:
 ; LA32R-NEXT:    nor $a1, $a0, $zero
-; LA32R-NEXT:    srli.w $a1, $a1, 1
-; LA32R-NEXT:    orn $a0, $a1, $a0
-; LA32R-NEXT:    srli.w $a1, $a0, 2
-; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 4
-; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 8
-; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 16
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a6, $a1, 1
+; LA32R-NEXT:    nor $a1, $a1, $a6
+; LA32R-NEXT:    orn $a0, $a6, $a0
+; LA32R-NEXT:    srli.w $a6, $a0, 2
+; LA32R-NEXT:    andn $a1, $a1, $a6
+; LA32R-NEXT:    or $a0, $a0, $a6
+; LA32R-NEXT:    srli.w $a6, $a0, 4
+; LA32R-NEXT:    andn $a1, $a1, $a6
+; LA32R-NEXT:    or $a0, $a0, $a6
+; LA32R-NEXT:    srli.w $a6, $a0, 8
+; LA32R-NEXT:    andn $a1, $a1, $a6
+; LA32R-NEXT:    or $a0, $a0, $a6
+; LA32R-NEXT:    srli.w $a0, $a0, 16
+; LA32R-NEXT:    andn $a0, $a1, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    and $a1, $a1, $a5
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
@@ -433,15 +466,19 @@ define i64 @test_not_ctlz_i64(i64 %a) nounwind {
 ; LA32R-NEXT:    ret
 ; LA32R-NEXT:  .LBB7_2:
 ; LA32R-NEXT:    srli.w $a0, $a6, 1
+; LA32R-NEXT:    nor $a6, $a6, $a0
 ; LA32R-NEXT:    orn $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 2
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 4
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 8
+; LA32R-NEXT:    andn $a6, $a6, $a1
 ; LA32R-NEXT:    or $a0, $a0, $a1
-; LA32R-NEXT:    srli.w $a1, $a0, 16
-; LA32R-NEXT:    nor $a0, $a0, $a1
+; LA32R-NEXT:    srli.w $a0, $a0, 16
+; LA32R-NEXT:    andn $a0, $a6, $a0
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    and $a1, $a1, $a5
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll
index 2c3bf944cdf89..9ff15f946d2d6 100644
--- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll
+++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll
@@ -11,25 +11,31 @@ define i128 @f1(i128 %a) {
 ; CHECK-NEXT:    vl %v0, 0(%r3), 3
 ; CHECK-NEXT:    vrepib %v1, 1
 ; CHECK-NEXT:    vsrl %v1, %v0, %v1
+; CHECK-NEXT:    vno %v2, %v0, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 2
 ; CHECK-NEXT:    vsrl %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 4
 ; CHECK-NEXT:    vsrl %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 8
 ; CHECK-NEXT:    vsrlb %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 16
 ; CHECK-NEXT:    vsrlb %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 32
 ; CHECK-NEXT:    vsrlb %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 64
-; CHECK-NEXT:    vsrlb %v1, %v0, %v1
-; CHECK-NEXT:    vno %v0, %v0, %v1
+; CHECK-NEXT:    vsrlb %v0, %v0, %v1
+; CHECK-NEXT:    vnc %v0, %v2, %v0
 ; CHECK-NEXT:    vpopct %v0, %v0, 0
 ; CHECK-NEXT:    vgbm %v1, 0
 ; CHECK-NEXT:    vsumb %v0, %v0, %v1
@@ -47,25 +53,31 @@ define i128 @f2(i128 %a) {
 ; CHECK-NEXT:    vl %v0, 0(%r3), 3
 ; CHECK-NEXT:    vrepib %v1, 1
 ; CHECK-NEXT:    vsrl %v1, %v0, %v1
+; CHECK-NEXT:    vno %v2, %v0, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 2
 ; CHECK-NEXT:    vsrl %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 4
 ; CHECK-NEXT:    vsrl %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 8
 ; CHECK-NEXT:    vsrlb %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 16
 ; CHECK-NEXT:    vsrlb %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 32
 ; CHECK-NEXT:    vsrlb %v1, %v0, %v1
+; CHECK-NEXT:    vnc %v2, %v2, %v1
 ; CHECK-NEXT:    vo %v0, %v0, %v1
 ; CHECK-NEXT:    vrepib %v1, 64
-; CHECK-NEXT:    vsrlb %v1, %v0, %v1
-; CHECK-NEXT:    vno %v0, %v0, %v1
+; CHECK-NEXT:    vsrlb %v0, %v0, %v1
+; CHECK-NEXT:    vnc %v0, %v2, %v0
 ; CHECK-NEXT:    vpopct %v0, %v0, 0
 ; CHECK-NEXT:    vgbm %v1, 0
 ; CHECK-NEXT:    vsumb %v0, %v0, %v1
diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll
index bcdedcd3a407b..417fcb90af9a3 100644
--- a/llvm/test/CodeGen/SystemZ/vec-eval.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll
@@ -1889,7 +1889,9 @@ entry:
 define <16 x i8> @eval128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    veval %v24, %v26, %v24, %v28, 128
+; CHECK-NEXT:    vno %v0, %v24, %v24
+; CHECK-NEXT:    vno %v1, %v26, %v26
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 2
 ; CHECK-NEXT:    br %r14
 entry:
   %and.demorgan = or <16 x i8> %src2, %src1
@@ -1901,9 +1903,10 @@ entry:
 define <16 x i8> @eval129(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval129:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v0, %v26, %v24
+; CHECK-NEXT:    vgbm %v0, 65535
+; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
 ; CHECK-NEXT:    vn %v1, %v26, %v24
-; CHECK-NEXT:    veval %v24, %v1, %v28, %v0, 139
+; CHECK-NEXT:    vsel %v24, %v1, %v0, %v28
 ; CHECK-NEXT:    br %r14
 entry:
   %and.demorgan = or <16 x i8> %src2, %src1
@@ -2034,8 +2037,10 @@ entry:
 define <16 x i8> @eval138(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval138:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    veval %v0, %v26, %v24, %v28, 127
-; CHECK-NEXT:    veval %v24, %v24, %v28, %v0, 174
+; CHECK-NEXT:    vgbm %v0, 65535
+; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT:    vnc %v1, %v24, %v28
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 47
 ; CHECK-NEXT:    br %r14
 entry:
   %not2 = xor <16 x i8> %src3, splat(i8 -1)
@@ -2050,9 +2055,10 @@ entry:
 define <16 x i8> @eval139(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval139:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v0, %v26, %v24
+; CHECK-NEXT:    vgbm %v0, 65535
+; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
 ; CHECK-NEXT:    veval %v1, %v24, %v26, %v28, 11
-; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 47
 ; CHECK-NEXT:    br %r14
 entry:
   %0 = or <16 x i8> %src2, %src1
@@ -2068,8 +2074,10 @@ entry:
 define <16 x i8> @eval140(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval140:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 127
-; CHECK-NEXT:    veval %v24, %v24, %v26, %v0, 174
+; CHECK-NEXT:    vgbm %v0, 65535
+; CHECK-NEXT:    veval %v0, %v28, %v0, %v24, 40
+; CHECK-NEXT:    vnc %v1, %v24, %v26
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v26, 47
 ; CHECK-NEXT:    br %r14
 entry:
   %not1 = xor <16 x i8> %src2, splat(i8 -1)
@@ -2084,10 +2092,11 @@ entry:
 define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval141:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vgbm %v0, 65535
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 1
-; CHECK-NEXT:    vo %v0, %v26, %v24
+; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
 ; CHECK-NEXT:    veval %v1, %v1, %v24, %v26, 47
-; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 47
 ; CHECK-NEXT:    br %r14
 entry:
   %not1 = xor <16 x i8> %src2, splat(i8 -1)
@@ -2105,9 +2114,10 @@ entry:
 define <16 x i8> @eval142(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval142:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    veval %v0, %v26, %v24, %v28, 127
-; CHECK-NEXT:    vn %v1, %v28, %v26
-; CHECK-NEXT:    veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT:    vgbm %v0, 65535
+; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT:    veval %v1, %v24, %v28, %v26, 14
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 47
 ; CHECK-NEXT:    br %r14
 entry:
   %0 = or <16 x i8> %src2, %src1
@@ -2441,8 +2451,10 @@ entry:
 define <16 x i8> @eval162(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval162:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    veval %v0, %v28, %v24, %v26, 127
-; CHECK-NEXT:    veval %v24, %v26, %v28, %v0, 174
+; CHECK-NEXT:    vgbm %v1, 65535
+; CHECK-NEXT:    vno %v0, %v28, %v28
+; CHECK-NEXT:    veval %v1, %v24, %v1, %v28, 40
+; CHECK-NEXT:    vsel %v24, %v0, %v1, %v26
 ; CHECK-NEXT:    br %r14
 entry:
   %not2 = xor <16 x i8> %src3, splat(i8 -1)
@@ -2457,9 +2469,10 @@ entry:
 define <16 x i8> @eval163(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval163:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v0, %v26, %v24
+; CHECK-NEXT:    vgbm %v0, 65535
+; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 11
-; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 47
 ; CHECK-NEXT:    br %r14
 entry:
   %0 = or <16 x i8> %src2, %src1
>From 3681932bdb9559e53e4f31f4a53cf7af43f361e9 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 22 Oct 2025 19:05:28 +0000
Subject: [PATCH 19/20] [AArch64][PowerPC]: Reverting some updates
---
 llvm/test/CodeGen/AArch64/eon.ll              |  9 +++
 llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll |  1 -
 .../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll    | 20 ++----
 llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll  | 65 -------------------
 4 files changed, 14 insertions(+), 81 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/eon.ll b/llvm/test/CodeGen/AArch64/eon.ll
index ea0e0122d9b6d..f939b4901be09 100644
--- a/llvm/test/CodeGen/AArch64/eon.ll
+++ b/llvm/test/CodeGen/AArch64/eon.ll
@@ -36,6 +36,10 @@ entry:
 
 ; Check that eon is generated if the xor is a disjoint or.
 define i64 @disjoint_or(i64 %a, i64 %b) {
+; CHECK-LABEL: disjoint_or:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eon x0, x0, x1
+; CHECK-NEXT:    ret
   %or = or disjoint i64 %a, %b
   %eon = xor i64 %or, -1
   ret i64 %eon
@@ -43,6 +47,11 @@ define i64 @disjoint_or(i64 %a, i64 %b) {
 
 ; Check that eon is *not* generated if the or is not disjoint.
 define i64 @normal_or(i64 %a, i64 %b) {
+; CHECK-LABEL: normal_or:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr x8, x0, x1
+; CHECK-NEXT:    mvn x0, x8
+; CHECK-NEXT:    ret
   %or = or i64 %a, %b
   %not = xor i64 %or, -1
   ret i64 %not
diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index ed8dc504f026a..bea24ee98336d 100644
--- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | \
 ; RUN:   grep eqv | count 3
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | \
diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
index 310f0a66aa9b9..c23daac80279b 100644
--- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -1,39 +1,29 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; Check the miscellaneous logical vector operations added in P8
-;
+; 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 ; Test x eqv y
 define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: test_veqv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    veqv 2, 2, 3
-; CHECK-NEXT:    blr
        %tmp = xor <4 x i32> %x, %y
        %ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1>
        ret <4 x i32> %ret_val
+; CHECK: veqv 2, 2, 3
 }
 
 ; Test x vnand y
 define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: test_vnand:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vnand 2, 2, 3
-; CHECK-NEXT:    blr
        %tmp = and <4 x i32> %x, %y
        %ret_val = xor <4 x i32> %tmp, <i32 -1, i32 -1, i32 -1, i32 -1>
        ret <4 x i32> %ret_val
+; CHECK: vnand 2, 2, 3
 }
 
 ; Test x vorc y and variants
 define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: test_vorc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vorc 3, 2, 3
-; CHECK-NEXT:    vorc 2, 2, 3
-; CHECK-NEXT:    blr
        %tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
        %tmp2 = or <4 x i32> %x, %tmp1
+; CHECK: vorc 3, 2, 3      
        %tmp3 = xor <4 x i32> %tmp2, <i32 -1, i32 -1, i32 -1, i32 -1>
        %tmp4 = or <4 x i32> %tmp3, %x
+; CHECK: vorc 2, 2, 3
        ret <4 x i32> %tmp4
 }
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
index 7f7a52fe7de65..ba74df956e71e 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
 
@@ -7,10 +6,6 @@
 ; CHECK:         xxlandc v2, v2, v3
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_not(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_not:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlandc v2, v2, v3
-; CHECK-NEXT:    blr
 entry:
   %neg = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
   %and = and <4 x i32> %neg, %A
@@ -22,10 +17,6 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <16 x i8> @and_and8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT:    blr
 entry:
   %and = and <16 x i8> %B, %A
   %and1 = and <16 x i8> %and, %C
@@ -37,10 +28,6 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <8 x i16> @and_and16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT:    blr
 entry:
   %and = and <8 x i16> %B, %A
   %and1 = and <8 x i16> %and, %C
@@ -52,10 +39,6 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_and32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %A
   %and1 = and <4 x i32> %and, %C
@@ -67,10 +50,6 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 1
 ; CHECK-NEXT:    blr
 define dso_local <2 x i64> @and_and64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %A
   %and1 = and <2 x i64> %and, %C
@@ -82,10 +61,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 14
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_nand:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 14
-; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %C, %B
   %neg = xor <4 x i32> %and, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -98,10 +73,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 7
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_or:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 7
-; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %C, %B
   %and = and <4 x i32> %or, %A
@@ -113,10 +84,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 8
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_nor:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 8
-; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %C, %B
   %neg = xor <4 x i32> %or, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -129,10 +96,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 6
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_xor:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 6
-; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %C, %B
   %and = and <4 x i32> %xor, %A
@@ -144,10 +107,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v3, v4, 9
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @and_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_eqv:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 9
-; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
   %neg = xor <4 x i32> %xor, %C
@@ -160,10 +119,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 241
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_nand:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 241
-; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %C, %B
   %A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -176,10 +131,6 @@ entry:
 ; CHECK:         xxeval v2, v3, v2, v4, 254
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_and(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_and:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v3, v2, v4, 254
-; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %A
   %and1 = and <4 x i32> %and, %C
@@ -192,10 +143,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 249
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_xor:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 249
-; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %C, %B
   %and = and <4 x i32> %xor, %A
@@ -208,10 +155,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 246
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_eqv:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 246
-; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %C, %B
   %A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -224,10 +167,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v4, v3, 248
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_or:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v4, v3, 248
-; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %C, %B
   %and = and <4 x i32> %or, %A
@@ -240,10 +179,6 @@ entry:
 ; CHECK:         xxeval v2, v2, v3, v4, 247
 ; CHECK-NEXT:    blr
 define dso_local <4 x i32> @nand_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_nor:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 247
-; CHECK-NEXT:    blr
 entry:
   %A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
   %or = or <4 x i32> %A.not, %B
>From b3ec648bec97aa472e73a7cb4bcc93c8bbbfbf5d Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 22 Oct 2025 19:06:41 +0000
Subject: [PATCH 20/20] [X86]: Removed comment
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 7 -------
 1 file changed, 7 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e870514db2443..16f6d31728717 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55615,13 +55615,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
 
   // Folds for better commutativity:
   if (N1->hasOneUse()) {
-    /*
-    // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
-    if (SDValue Not = IsNOT(N1, DAG))
-      return DAG.getNOT(
-          DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
-          */
-
     // ANDNP(x,PSHUFB(y,z)) -> PSHUFB(y,OR(z,x))
     // Zero out elements by setting the PSHUFB mask value to 0xFF.
     if (DAG.ComputeNumSignBits(N0) == EltSizeInBits) {
    
    
More information about the llvm-commits
mailing list