[llvm] [X86]: Rewrite demorgan rule for ANDN (PR #163789)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 22 12:07:18 PDT 2025
https://github.com/kper updated https://github.com/llvm/llvm-project/pull/163789
>From ee35653e45cdfec53652118e9c93f0f12e597fbb Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 13:53:37 +0000
Subject: [PATCH 01/20] [X86]: Reassoc demorgan rule for ANDN
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 28 ++++++
llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 98 +++++++++++++++++++
2 files changed, 126 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0b64ff370b10..e2632d114ce0b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51651,6 +51651,31 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
return AndN;
}
+// fold (not (or A, B)) -> nand(A, not(B)) if BMI
+static SDValue
+combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace llvm::SDPatternMatch;
+
+ EVT VT = Xor->getValueType(0);
+ // Make sure this node is a candidate for BMI instructions.
+ if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+ return SDValue();
+
+ SDValue A;
+ SDValue B;
+ APInt Cst;
+ if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+ Cst.isAllOnes()))
+ return SDValue();
+
+ auto Opcode =
+ Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr;
+ auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT));
+ return SDValue(AndN, 0);
+}
+
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -55150,6 +55175,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
return R;
+ if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget))
+ return R;
+
return combineFneg(N, DAG, DCI, Subtarget);
}
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
new file mode 100644
index 0000000000000..ea81d08cd2e6d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+
+define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: reassoc_demorgan_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: notl %ecx
+; X86-NEXT: andnl %ecx, %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_i32:
+; X64: # %bb.0:
+; X64-NEXT: notl %edi
+; X64-NEXT: andnl %edi, %esi, %eax
+; X64-NEXT: retq
+ %temp = or i32 %b, %a
+ %res = xor i32 %temp, -1
+ ret i32 %res
+}
+
+define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; X86-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: notl %eax
+; X86-NEXT: andnl %eax, %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64: # %bb.0:
+; X64-NEXT: orl %esi, %edi
+; X64-NEXT: notl %edx
+; X64-NEXT: andnl %edx, %edi, %eax
+; X64-NEXT: retq
+ %and.demorgan = or i32 %b, %a
+ %and3.demorgan = or i32 %and.demorgan, %c
+ %and3 = xor i32 %and3.demorgan, -1
+ ret i32 %and3
+}
+
+define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: reassoc_demorgan_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: notl %edx
+; X86-NEXT: andnl %edx, %eax, %eax
+; X86-NEXT: notl %esi
+; X86-NEXT: andnl %esi, %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_i64:
+; X64: # %bb.0:
+; X64-NEXT: notq %rdi
+; X64-NEXT: andnq %rdi, %rsi, %rax
+; X64-NEXT: retq
+ %temp = or i64 %b, %a
+ %res = xor i64 %temp, -1
+ ret i64 %res
+}
+
+define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; X86-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: notl %eax
+; X86-NEXT: andnl %eax, %edx, %eax
+; X86-NEXT: notl %ecx
+; X86-NEXT: andnl %ecx, %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64: # %bb.0:
+; X64-NEXT: orq %rsi, %rdi
+; X64-NEXT: notq %rdx
+; X64-NEXT: andnq %rdx, %rdi, %rax
+; X64-NEXT: retq
+ %and.demorgan = or i64 %b, %a
+ %and3.demorgan = or i64 %and.demorgan, %c
+ %and3 = xor i64 %and3.demorgan, -1
+ ret i64 %and3
+}
>From deda3383ab0015a23d521f37d44a5714def2346c Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:24:40 +0000
Subject: [PATCH 02/20] [X86]: Removed obsolete code
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 28 -------------------------
1 file changed, 28 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e2632d114ce0b..a0b64ff370b10 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51651,31 +51651,6 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
return AndN;
}
-// fold (not (or A, B)) -> nand(A, not(B)) if BMI
-static SDValue
-combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- using namespace llvm::SDPatternMatch;
-
- EVT VT = Xor->getValueType(0);
- // Make sure this node is a candidate for BMI instructions.
- if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
- return SDValue();
-
- SDValue A;
- SDValue B;
- APInt Cst;
- if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
- Cst.isAllOnes()))
- return SDValue();
-
- auto Opcode =
- Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr;
- auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT));
- return SDValue(AndN, 0);
-}
-
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -55175,9 +55150,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
return R;
- if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget))
- return R;
-
return combineFneg(N, DAG, DCI, Subtarget);
}
>From cdb57ef348cbd3ac3bd6b361fd6a3b4ddb9ff8fb Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:25:21 +0000
Subject: [PATCH 03/20] [DAG]: Reassoc demorgan rule for ANDN
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d64d455..0629b75989233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10197,6 +10197,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
}
+ // fold (not (or A, B)) -> and(not(A), not(B))
+ if (TLI.hasAndNot(SDValue(N, 0))) {
+ // If we have AndNot then it is profitable to apply demorgan to make use
+ // of the machine instruction.
+ SDValue A;
+ SDValue B;
+ APInt Cst;
+ if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+ Cst.isAllOnes()) {
+ return DAG.getNode(
+ ISD::AND, DL, VT,
+ DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(-1, DL, VT)),
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(-1, DL, VT)));
+ }
+ }
+
return SDValue();
}
>From 9e4103d5d49bbc92b1fdfad30e5e52f51f9c70e2 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:35:14 +0000
Subject: [PATCH 04/20] [DAG]: Fixed type
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0629b75989233..5b77dc423b66b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10206,10 +10206,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
APInt Cst;
if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
Cst.isAllOnes()) {
+ auto Ty = N->getValueType(0);
return DAG.getNode(
ISD::AND, DL, VT,
- DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(-1, DL, VT)),
- DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(-1, DL, VT)));
+ DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)),
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)));
}
}
>From cf6ee582057a42e9ec8f5b81355c2bee8a8067cb Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 18:45:46 +0000
Subject: [PATCH 05/20] [X86]: Updated tests
---
llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 210 ++++++++++++------
1 file changed, 143 insertions(+), 67 deletions(-)
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
index ea81d08cd2e6d..7f3a376b24b2a 100644
--- a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
+++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
@@ -1,42 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI
define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
-; X86-LABEL: reassoc_demorgan_i32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: notl %ecx
-; X86-NEXT: andnl %ecx, %eax, %eax
-; X86-NEXT: retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_i32:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT: notl %ecx
+; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax
+; X86-WITH-BMI-NEXT: retl
;
-; X64-LABEL: reassoc_demorgan_i32:
-; X64: # %bb.0:
-; X64-NEXT: notl %edi
-; X64-NEXT: andnl %edi, %esi, %eax
-; X64-NEXT: retq
+; X64-WITH-BMI-LABEL: reassoc_demorgan_i32:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: notl %edi
+; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: retl
+;
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT: orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT: notl %eax
+; X64-WITHOUT-BMI-NEXT: retq
%temp = or i32 %b, %a
%res = xor i32 %temp, -1
ret i32 %res
}
define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
-; X86-LABEL: reassoc_demorgan_three_arguments_i32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: notl %eax
-; X86-NEXT: andnl %eax, %ecx, %eax
-; X86-NEXT: retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: notl %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %ecx
+; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax
+; X86-WITH-BMI-NEXT: retl
+;
+; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: notl %edi
+; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax
+; X64-WITH-BMI-NEXT: andnl %eax, %edx, %eax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: retl
;
-; X64-LABEL: reassoc_demorgan_three_arguments_i32:
-; X64: # %bb.0:
-; X64-NEXT: orl %esi, %edi
-; X64-NEXT: notl %edx
-; X64-NEXT: andnl %edx, %edi, %eax
-; X64-NEXT: retq
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT: orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT: orl %edx, %eax
+; X64-WITHOUT-BMI-NEXT: notl %eax
+; X64-WITHOUT-BMI-NEXT: retq
%and.demorgan = or i32 %b, %a
%and3.demorgan = or i32 %and.demorgan, %c
%and3 = xor i32 %and3.demorgan, -1
@@ -44,53 +77,96 @@ define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwin
}
define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
-; X86-LABEL: reassoc_demorgan_i64:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: notl %edx
-; X86-NEXT: andnl %edx, %eax, %eax
-; X86-NEXT: notl %esi
-; X86-NEXT: andnl %esi, %ecx, %edx
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_i64:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: pushl %esi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-WITH-BMI-NEXT: notl %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax
+; X86-WITH-BMI-NEXT: notl %esi
+; X86-WITH-BMI-NEXT: andnl %esi, %ecx, %edx
+; X86-WITH-BMI-NEXT: popl %esi
+; X86-WITH-BMI-NEXT: retl
;
-; X64-LABEL: reassoc_demorgan_i64:
-; X64: # %bb.0:
-; X64-NEXT: notq %rdi
-; X64-NEXT: andnq %rdi, %rsi, %rax
-; X64-NEXT: retq
+; X64-WITH-BMI-LABEL: reassoc_demorgan_i64:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: notq %rdi
+; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: notl %edx
+; X86-WITHOUT-BMI-NEXT: retl
+;
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT: notq %rax
+; X64-WITHOUT-BMI-NEXT: retq
%temp = or i64 %b, %a
%res = xor i64 %temp, -1
ret i64 %res
}
define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
-; X86-LABEL: reassoc_demorgan_three_arguments_i64:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: notl %eax
-; X86-NEXT: andnl %eax, %edx, %eax
-; X86-NEXT: notl %ecx
-; X86-NEXT: andnl %ecx, %esi, %edx
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
+; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: pushl %ebx
+; X86-WITH-BMI-NEXT: pushl %edi
+; X86-WITH-BMI-NEXT: pushl %esi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-WITH-BMI-NEXT: notl %edi
+; X86-WITH-BMI-NEXT: andnl %edi, %edx, %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax
+; X86-WITH-BMI-NEXT: notl %ebx
+; X86-WITH-BMI-NEXT: andnl %ebx, %esi, %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %edx
+; X86-WITH-BMI-NEXT: popl %esi
+; X86-WITH-BMI-NEXT: popl %edi
+; X86-WITH-BMI-NEXT: popl %ebx
+; X86-WITH-BMI-NEXT: retl
+;
+; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: notq %rdi
+; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax
+; X64-WITH-BMI-NEXT: andnq %rax, %rdx, %rax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: notl %edx
+; X86-WITHOUT-BMI-NEXT: retl
;
-; X64-LABEL: reassoc_demorgan_three_arguments_i64:
-; X64: # %bb.0:
-; X64-NEXT: orq %rsi, %rdi
-; X64-NEXT: notq %rdx
-; X64-NEXT: andnq %rdx, %rdi, %rax
-; X64-NEXT: retq
+; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT: orq %rdx, %rax
+; X64-WITHOUT-BMI-NEXT: notq %rax
+; X64-WITHOUT-BMI-NEXT: retq
%and.demorgan = or i64 %b, %a
%and3.demorgan = or i64 %and.demorgan, %c
%and3 = xor i64 %and3.demorgan, -1
>From cef0067d84a41aad176352f02d9585dd71355a71 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Thu, 16 Oct 2025 20:16:46 +0000
Subject: [PATCH 06/20] [DAG]: Updated tests
---
llvm/test/CodeGen/X86/andnot-patterns.ll | 184 ++++++++++++++---------
1 file changed, 116 insertions(+), 68 deletions(-)
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index fc573fbd4fc99..0701d7046fc35 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -761,6 +761,7 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
;
; X86-BMI-LABEL: andnot_bitreverse_i64:
; X86-BMI: # %bb.0:
+; X86-BMI-NEXT: pushl %esi
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: bswapl %eax
@@ -774,13 +775,16 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-BMI-NEXT: shrl $2, %eax
; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT: leal (%eax,%edx,4), %eax
-; X86-BMI-NEXT: movl %eax, %edx
-; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT: leal (%eax,%edx,4), %esi
+; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-BMI-NEXT: addl %esi, %esi
+; X86-BMI-NEXT: shll $2, %edx
+; X86-BMI-NEXT: notl %edx
+; X86-BMI-NEXT: andnl %edx, %eax, %eax
; X86-BMI-NEXT: shrl %eax
-; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-BMI-NEXT: leal (%eax,%edx,2), %eax
-; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-BMI-NEXT: andnl %eax, %esi, %eax
+; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: bswapl %ecx
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
@@ -792,13 +796,17 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-BMI-NEXT: shrl $2, %ecx
; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-BMI-NEXT: leal (%ecx,%edx,4), %ecx
-; X86-BMI-NEXT: movl %ecx, %edx
-; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT: leal (%ecx,%edx,4), %esi
+; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-BMI-NEXT: addl %esi, %esi
+; X86-BMI-NEXT: shll $2, %edx
+; X86-BMI-NEXT: notl %edx
+; X86-BMI-NEXT: andnl %edx, %ecx, %ecx
; X86-BMI-NEXT: shrl %ecx
-; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-BMI-NEXT: leal (%ecx,%edx,2), %ecx
-; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA
+; X86-BMI-NEXT: andnl %ecx, %esi, %edx
+; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT: popl %esi
; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: andnot_bitreverse_i64:
@@ -837,19 +845,23 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
; X64-BMI-NEXT: andq %rcx, %rsi
; X64-BMI-NEXT: shlq $4, %rsi
; X64-BMI-NEXT: orq %rax, %rsi
-; X64-BMI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-BMI-NEXT: movq %rsi, %rcx
-; X64-BMI-NEXT: andq %rax, %rcx
-; X64-BMI-NEXT: shrq $2, %rsi
-; X64-BMI-NEXT: andq %rax, %rsi
-; X64-BMI-NEXT: leaq (%rsi,%rcx,4), %rax
-; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-BMI-NEXT: movq %rax, %rdx
-; X64-BMI-NEXT: andq %rcx, %rdx
-; X64-BMI-NEXT: shrq %rax
+; X64-BMI-NEXT: movq %rsi, %rax
+; X64-BMI-NEXT: shrq $2, %rax
+; X64-BMI-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
; X64-BMI-NEXT: andq %rcx, %rax
-; X64-BMI-NEXT: leaq (%rax,%rdx,2), %rax
-; X64-BMI-NEXT: andnq %rdi, %rax, %rax
+; X64-BMI-NEXT: andq %rcx, %rsi
+; X64-BMI-NEXT: leaq (,%rsi,4), %rcx
+; X64-BMI-NEXT: notq %rcx
+; X64-BMI-NEXT: andnq %rcx, %rax, %rcx
+; X64-BMI-NEXT: shrq %rcx
+; X64-BMI-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA
+; X64-BMI-NEXT: orq %rcx, %rdx
+; X64-BMI-NEXT: leaq (%rax,%rsi,4), %rax
+; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT: andq %rax, %rcx
+; X64-BMI-NEXT: addq %rcx, %rcx
+; X64-BMI-NEXT: andnq %rdx, %rcx, %rax
+; X64-BMI-NEXT: andq %rdi, %rax
; X64-BMI-NEXT: retq
%not = xor i64 %a1, -1
%bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not)
@@ -896,13 +908,16 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-BMI-NEXT: shrl $2, %eax
; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT: leal (%eax,%ecx,4), %eax
-; X86-BMI-NEXT: movl %eax, %ecx
-; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx
+; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT: addl %edx, %edx
+; X86-BMI-NEXT: shll $2, %ecx
+; X86-BMI-NEXT: notl %ecx
+; X86-BMI-NEXT: andnl %ecx, %eax, %eax
; X86-BMI-NEXT: shrl %eax
-; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-BMI-NEXT: leal (%eax,%ecx,2), %eax
-; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-BMI-NEXT: andnl %eax, %edx, %eax
+; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: andnot_bitreverse_i32:
@@ -940,16 +955,19 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
; X64-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
; X64-BMI-NEXT: orl %eax, %esi
; X64-BMI-NEXT: movl %esi, %eax
+; X64-BMI-NEXT: shrl $2, %eax
; X64-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-BMI-NEXT: shrl $2, %esi
; X64-BMI-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax
-; X64-BMI-NEXT: movl %eax, %ecx
-; X64-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X64-BMI-NEXT: shrl %eax
+; X64-BMI-NEXT: leal (,%rsi,4), %ecx
+; X64-BMI-NEXT: notl %ecx
+; X64-BMI-NEXT: andnl %ecx, %eax, %ecx
+; X64-BMI-NEXT: shrl %ecx
+; X64-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA
+; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax
; X64-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax
-; X64-BMI-NEXT: andnl %edi, %eax, %eax
+; X64-BMI-NEXT: addl %eax, %eax
+; X64-BMI-NEXT: andnl %ecx, %eax, %eax
+; X64-BMI-NEXT: andl %edi, %eax
; X64-BMI-NEXT: retq
%not = xor i32 %a1, -1
%bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not)
@@ -958,30 +976,57 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
}
define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
-; X86-LABEL: andnot_bitreverse_i16:
-; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $3855, %ecx # imm = 0xF0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $3855, %eax # imm = 0xF0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $13107, %ecx # imm = 0x3333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $13107, %eax # imm = 0x3333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $21845, %ecx # imm = 0x5555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $21845, %eax # imm = 0x5555
-; X86-NEXT: leal (%eax,%ecx,2), %eax
-; X86-NEXT: notl %eax
-; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; X86-NOBMI-LABEL: andnot_bitreverse_i16:
+; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: rolw $8, %ax
+; X86-NOBMI-NEXT: movl %eax, %ecx
+; X86-NOBMI-NEXT: andl $3855, %ecx # imm = 0xF0F
+; X86-NOBMI-NEXT: shll $4, %ecx
+; X86-NOBMI-NEXT: shrl $4, %eax
+; X86-NOBMI-NEXT: andl $3855, %eax # imm = 0xF0F
+; X86-NOBMI-NEXT: orl %ecx, %eax
+; X86-NOBMI-NEXT: movl %eax, %ecx
+; X86-NOBMI-NEXT: andl $13107, %ecx # imm = 0x3333
+; X86-NOBMI-NEXT: shrl $2, %eax
+; X86-NOBMI-NEXT: andl $13107, %eax # imm = 0x3333
+; X86-NOBMI-NEXT: leal (%eax,%ecx,4), %eax
+; X86-NOBMI-NEXT: movl %eax, %ecx
+; X86-NOBMI-NEXT: andl $21845, %ecx # imm = 0x5555
+; X86-NOBMI-NEXT: shrl %eax
+; X86-NOBMI-NEXT: andl $21845, %eax # imm = 0x5555
+; X86-NOBMI-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NOBMI-NEXT: notl %eax
+; X86-NOBMI-NEXT: andw {{[0-9]+}}(%esp), %ax
+; X86-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NOBMI-NEXT: retl
+;
+; X86-BMI-LABEL: andnot_bitreverse_i16:
+; X86-BMI: # %bb.0:
+; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: rolw $8, %ax
+; X86-BMI-NEXT: movl %eax, %ecx
+; X86-BMI-NEXT: andl $3855, %ecx # imm = 0xF0F
+; X86-BMI-NEXT: shll $4, %ecx
+; X86-BMI-NEXT: shrl $4, %eax
+; X86-BMI-NEXT: andl $3855, %eax # imm = 0xF0F
+; X86-BMI-NEXT: orl %ecx, %eax
+; X86-BMI-NEXT: movl %eax, %ecx
+; X86-BMI-NEXT: andl $13107, %ecx # imm = 0x3333
+; X86-BMI-NEXT: shrl $2, %eax
+; X86-BMI-NEXT: andl $13107, %eax # imm = 0x3333
+; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx
+; X86-BMI-NEXT: andl $21845, %edx # imm = 0x5555
+; X86-BMI-NEXT: addl %edx, %edx
+; X86-BMI-NEXT: shll $2, %ecx
+; X86-BMI-NEXT: notl %ecx
+; X86-BMI-NEXT: andnl %ecx, %eax, %eax
+; X86-BMI-NEXT: shrl %eax
+; X86-BMI-NEXT: orl $43690, %eax # imm = 0xAAAA
+; X86-BMI-NEXT: andnl %eax, %edx, %eax
+; X86-BMI-NEXT: andw {{[0-9]+}}(%esp), %ax
+; X86-BMI-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: andnot_bitreverse_i16:
; X64-NOBMI: # %bb.0:
@@ -1019,16 +1064,19 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
; X64-BMI-NEXT: andl $3855, %esi # imm = 0xF0F
; X64-BMI-NEXT: orl %eax, %esi
; X64-BMI-NEXT: movl %esi, %eax
+; X64-BMI-NEXT: shrl $2, %eax
; X64-BMI-NEXT: andl $13107, %eax # imm = 0x3333
-; X64-BMI-NEXT: shrl $2, %esi
; X64-BMI-NEXT: andl $13107, %esi # imm = 0x3333
-; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax
-; X64-BMI-NEXT: movl %eax, %ecx
-; X64-BMI-NEXT: andl $21845, %ecx # imm = 0x5555
-; X64-BMI-NEXT: shrl %eax
+; X64-BMI-NEXT: leal (,%rsi,4), %ecx
+; X64-BMI-NEXT: notl %ecx
+; X64-BMI-NEXT: andnl %ecx, %eax, %ecx
+; X64-BMI-NEXT: shrl %ecx
+; X64-BMI-NEXT: orl $-21846, %ecx # imm = 0xAAAA
+; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax
; X64-BMI-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax
-; X64-BMI-NEXT: andnl %edi, %eax, %eax
+; X64-BMI-NEXT: addl %eax, %eax
+; X64-BMI-NEXT: andnl %ecx, %eax, %eax
+; X64-BMI-NEXT: andl %edi, %eax
; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax
; X64-BMI-NEXT: retq
%not = xor i16 %a1, -1
>From e31e5ab5bc605bd83d9090b207fabf7094294679 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Fri, 17 Oct 2025 05:08:17 +0000
Subject: [PATCH 07/20] [DAG]: Updated tests
---
.../AArch64/neon-compare-instructions.ll | 264 ++++++++++++------
llvm/test/CodeGen/PowerPC/vsx.ll | 62 ++--
2 files changed, 219 insertions(+), 107 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 11b3b62ec1c8d..60c6d84679451 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2217,13 +2217,21 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmuno2xfloat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: mvn v0.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: mvn v1.8b, v2.8b
+; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp uno <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -2231,13 +2239,21 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmuno4xfloat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp uno <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -2245,13 +2261,21 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmuno2xdouble:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp uno <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -2259,13 +2283,21 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmueq2xfloat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmgt v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: mvn v0.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmueq2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: mvn v1.8b, v2.8b
+; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmueq2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmgt v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp ueq <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -2273,13 +2305,21 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmueq4xfloat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmgt v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmueq4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmueq4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp ueq <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -2287,13 +2327,21 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmueq2xdouble:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmgt v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmueq2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmueq2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmgt v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp ueq <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -2792,13 +2840,21 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
; UEQ with zero = !ONE = !(OLT |OGT)
define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
-; CHECK-LABEL: fcmueqz2xfloat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmgt v1.2s, v0.2s, #0.0
-; CHECK-NEXT: fcmlt v0.2s, v0.2s, #0.0
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: mvn v0.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmueqz2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmlt v1.2s, v0.2s, #0.0
+; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, #0.0
+; CHECK-SD-NEXT: mvn v1.8b, v1.8b
+; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmueqz2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmgt v1.2s, v0.2s, #0.0
+; CHECK-GI-NEXT: fcmlt v0.2s, v0.2s, #0.0
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -2806,13 +2862,21 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
; UEQ with zero = !ONE = !(OLT |OGT)
define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
-; CHECK-LABEL: fcmueqz4xfloat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmgt v1.4s, v0.4s, #0.0
-; CHECK-NEXT: fcmlt v0.4s, v0.4s, #0.0
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmueqz4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmlt v1.4s, v0.4s, #0.0
+; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, #0.0
+; CHECK-SD-NEXT: mvn v1.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmueqz4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmgt v1.4s, v0.4s, #0.0
+; CHECK-GI-NEXT: fcmlt v0.4s, v0.4s, #0.0
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -2820,13 +2884,21 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
; UEQ with zero = !ONE = !(OLT |OGT)
define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
-; CHECK-LABEL: fcmueqz2xdouble:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmgt v1.2d, v0.2d, #0.0
-; CHECK-NEXT: fcmlt v0.2d, v0.2d, #0.0
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmueqz2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmlt v1.2d, v0.2d, #0.0
+; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, #0.0
+; CHECK-SD-NEXT: mvn v1.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmueqz2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmgt v1.2d, v0.2d, #0.0
+; CHECK-GI-NEXT: fcmlt v0.2d, v0.2d, #0.0
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -3286,39 +3358,63 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmuno2xfloat_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: mvn v0.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno2xfloat_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: mvn v1.8b, v2.8b
+; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno2xfloat_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast uno <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
}
define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmuno4xfloat_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno4xfloat_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno4xfloat_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast uno <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmuno2xdouble_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno2xdouble_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno2xdouble_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast uno <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 14b3d69f8c273..3cde26271d50e 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -378,23 +378,27 @@ entry:
define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test14:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor v2, v2, v3
+; CHECK-NEXT: xxlnor vs0, v2, v2
+; CHECK-NEXT: xxlandc v2, vs0, v3
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test14:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xxlnor v2, v2, v3
+; CHECK-REG-NEXT: xxlnor vs0, v2, v2
+; CHECK-REG-NEXT: xxlandc v2, vs0, v3
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test14:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor vs0, v2, v3
-; CHECK-FISL-NEXT: xxlnor v2, v2, v3
+; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT: xxlandc v2, vs0, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test14:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xxlnor v2, v2, v3
+; CHECK-LE-NEXT: xxlnor vs0, v2, v2
+; CHECK-LE-NEXT: xxlandc v2, vs0, v3
; CHECK-LE-NEXT: blr
entry:
%v = or <4 x i32> %a, %b
@@ -408,23 +412,27 @@ entry:
define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test15:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor v2, v2, v3
+; CHECK-NEXT: xxlnor vs0, v2, v2
+; CHECK-NEXT: xxlandc v2, vs0, v3
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test15:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xxlnor v2, v2, v3
+; CHECK-REG-NEXT: xxlnor vs0, v2, v2
+; CHECK-REG-NEXT: xxlandc v2, vs0, v3
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test15:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor v4, v2, v3
-; CHECK-FISL-NEXT: xxlnor v2, v2, v3
+; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT: xxlandc v2, vs0, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test15:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xxlnor v2, v2, v3
+; CHECK-LE-NEXT: xxlnor vs0, v2, v2
+; CHECK-LE-NEXT: xxlandc v2, vs0, v3
; CHECK-LE-NEXT: blr
entry:
%v = or <8 x i16> %a, %b
@@ -438,23 +446,27 @@ entry:
define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor v2, v2, v3
+; CHECK-NEXT: xxlnor vs0, v2, v2
+; CHECK-NEXT: xxlandc v2, vs0, v3
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test16:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xxlnor v2, v2, v3
+; CHECK-REG-NEXT: xxlnor vs0, v2, v2
+; CHECK-REG-NEXT: xxlandc v2, vs0, v3
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test16:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor v4, v2, v3
-; CHECK-FISL-NEXT: xxlnor v2, v2, v3
+; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
+; CHECK-FISL-NEXT: xxlandc v2, vs0, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test16:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xxlnor v2, v2, v3
+; CHECK-LE-NEXT: xxlnor vs0, v2, v2
+; CHECK-LE-NEXT: xxlandc v2, vs0, v3
; CHECK-LE-NEXT: blr
entry:
%v = or <16 x i8> %a, %b
@@ -624,34 +636,38 @@ entry:
define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
; CHECK-LABEL: test22:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
-; CHECK-NEXT: xxlor vs0, vs1, vs0
-; CHECK-NEXT: xxsel v2, v2, v3, vs0
+; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
+; CHECK-NEXT: xxlnor vs1, vs1, vs1
+; CHECK-NEXT: xxlandc vs0, vs1, vs0
+; CHECK-NEXT: xxsel v2, v3, v2, vs0
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test22:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
-; CHECK-REG-NEXT: xxlor vs0, vs1, vs0
-; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0
+; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
+; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1
+; CHECK-REG-NEXT: xxlandc vs0, vs1, vs0
+; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test22:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4
; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5
-; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
-; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0
+; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs0
+; CHECK-FISL-NEXT: xxlandc vs0, vs0, vs1
+; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test22:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
-; CHECK-LE-NEXT: xxlor vs0, vs1, vs0
-; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0
+; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
+; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1
+; CHECK-LE-NEXT: xxlandc vs0, vs1, vs0
+; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0
; CHECK-LE-NEXT: blr
entry:
%m = fcmp ueq <4 x float> %c, %d
>From cf9da8380eacff36dd4173c2afd7e39e36c1e50a Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:22:07 +0000
Subject: [PATCH 08/20] Revert "[DAG]: Updated tests"
This reverts commit e31e5ab5bc605bd83d9090b207fabf7094294679.
---
.../AArch64/neon-compare-instructions.ll | 264 ++++++------------
llvm/test/CodeGen/PowerPC/vsx.ll | 62 ++--
2 files changed, 107 insertions(+), 219 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 60c6d84679451..11b3b62ec1c8d 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2217,21 +2217,13 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-SD-LABEL: fcmuno2xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: mvn v1.8b, v2.8b
-; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmuno2xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmuno2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
%tmp3 = fcmp uno <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -2239,21 +2231,13 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-SD-LABEL: fcmuno4xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmuno4xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmuno4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp uno <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -2261,21 +2245,13 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-SD-LABEL: fcmuno2xdouble:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmuno2xdouble:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmuno2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp uno <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -2283,21 +2259,13 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
-; CHECK-SD-LABEL: fcmueq2xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: mvn v1.8b, v2.8b
-; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmueq2xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmgt v2.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmueq2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmgt v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
%tmp3 = fcmp ueq <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -2305,21 +2273,13 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
-; CHECK-SD-LABEL: fcmueq4xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmueq4xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmueq4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp ueq <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -2327,21 +2287,13 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
-; CHECK-SD-LABEL: fcmueq2xdouble:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmueq2xdouble:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmgt v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmueq2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp ueq <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -2840,21 +2792,13 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
; UEQ with zero = !ONE = !(OLT |OGT)
define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
-; CHECK-SD-LABEL: fcmueqz2xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmlt v1.2s, v0.2s, #0.0
-; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, #0.0
-; CHECK-SD-NEXT: mvn v1.8b, v1.8b
-; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmueqz2xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmgt v1.2s, v0.2s, #0.0
-; CHECK-GI-NEXT: fcmlt v0.2s, v0.2s, #0.0
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmueqz2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmgt v1.2s, v0.2s, #0.0
+; CHECK-NEXT: fcmlt v0.2s, v0.2s, #0.0
+; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
%tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -2862,21 +2806,13 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
; UEQ with zero = !ONE = !(OLT |OGT)
define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
-; CHECK-SD-LABEL: fcmueqz4xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmlt v1.4s, v0.4s, #0.0
-; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, #0.0
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmueqz4xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmgt v1.4s, v0.4s, #0.0
-; CHECK-GI-NEXT: fcmlt v0.4s, v0.4s, #0.0
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmueqz4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmgt v1.4s, v0.4s, #0.0
+; CHECK-NEXT: fcmlt v0.4s, v0.4s, #0.0
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -2884,21 +2820,13 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
; UEQ with zero = !ONE = !(OLT |OGT)
define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
-; CHECK-SD-LABEL: fcmueqz2xdouble:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmlt v1.2d, v0.2d, #0.0
-; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, #0.0
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmueqz2xdouble:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmgt v1.2d, v0.2d, #0.0
-; CHECK-GI-NEXT: fcmlt v0.2d, v0.2d, #0.0
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmueqz2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmgt v1.2d, v0.2d, #0.0
+; CHECK-NEXT: fcmlt v0.2d, v0.2d, #0.0
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -3358,63 +3286,39 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
-; CHECK-SD-LABEL: fcmuno2xfloat_fast:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: mvn v1.8b, v2.8b
-; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmuno2xfloat_fast:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmuno2xfloat_fast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
%tmp3 = fcmp fast uno <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
}
define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
-; CHECK-SD-LABEL: fcmuno4xfloat_fast:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmuno4xfloat_fast:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmuno4xfloat_fast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp fast uno <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
-; CHECK-SD-LABEL: fcmuno2xdouble_fast:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fcmuno2xdouble_fast:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fcmuno2xdouble_fast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = fcmp fast uno <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 3cde26271d50e..14b3d69f8c273 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -378,27 +378,23 @@ entry:
define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test14:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor vs0, v2, v2
-; CHECK-NEXT: xxlandc v2, vs0, v3
+; CHECK-NEXT: xxlnor v2, v2, v3
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test14:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xxlnor vs0, v2, v2
-; CHECK-REG-NEXT: xxlandc v2, vs0, v3
+; CHECK-REG-NEXT: xxlnor v2, v2, v3
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test14:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor vs0, v2, v3
-; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
-; CHECK-FISL-NEXT: xxlandc v2, vs0, v3
+; CHECK-FISL-NEXT: xxlnor v2, v2, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test14:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xxlnor vs0, v2, v2
-; CHECK-LE-NEXT: xxlandc v2, vs0, v3
+; CHECK-LE-NEXT: xxlnor v2, v2, v3
; CHECK-LE-NEXT: blr
entry:
%v = or <4 x i32> %a, %b
@@ -412,27 +408,23 @@ entry:
define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test15:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor vs0, v2, v2
-; CHECK-NEXT: xxlandc v2, vs0, v3
+; CHECK-NEXT: xxlnor v2, v2, v3
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test15:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xxlnor vs0, v2, v2
-; CHECK-REG-NEXT: xxlandc v2, vs0, v3
+; CHECK-REG-NEXT: xxlnor v2, v2, v3
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test15:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor v4, v2, v3
-; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
-; CHECK-FISL-NEXT: xxlandc v2, vs0, v3
+; CHECK-FISL-NEXT: xxlnor v2, v2, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test15:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xxlnor vs0, v2, v2
-; CHECK-LE-NEXT: xxlandc v2, vs0, v3
+; CHECK-LE-NEXT: xxlnor v2, v2, v3
; CHECK-LE-NEXT: blr
entry:
%v = or <8 x i16> %a, %b
@@ -446,27 +438,23 @@ entry:
define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor vs0, v2, v2
-; CHECK-NEXT: xxlandc v2, vs0, v3
+; CHECK-NEXT: xxlnor v2, v2, v3
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test16:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xxlnor vs0, v2, v2
-; CHECK-REG-NEXT: xxlandc v2, vs0, v3
+; CHECK-REG-NEXT: xxlnor v2, v2, v3
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test16:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor v4, v2, v3
-; CHECK-FISL-NEXT: xxlnor vs0, v2, v2
-; CHECK-FISL-NEXT: xxlandc v2, vs0, v3
+; CHECK-FISL-NEXT: xxlnor v2, v2, v3
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test16:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xxlnor vs0, v2, v2
-; CHECK-LE-NEXT: xxlandc v2, vs0, v3
+; CHECK-LE-NEXT: xxlnor v2, v2, v3
; CHECK-LE-NEXT: blr
entry:
%v = or <16 x i8> %a, %b
@@ -636,38 +624,34 @@ entry:
define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
; CHECK-LABEL: test22:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
-; CHECK-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-NEXT: xxlandc vs0, vs1, vs0
-; CHECK-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
+; CHECK-NEXT: xxlor vs0, vs1, vs0
+; CHECK-NEXT: xxsel v2, v2, v3, vs0
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test22:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
-; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-REG-NEXT: xxlandc vs0, vs1, vs0
-; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
+; CHECK-REG-NEXT: xxlor vs0, vs1, vs0
+; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test22:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4
; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5
-; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs0
-; CHECK-FISL-NEXT: xxlandc vs0, vs0, vs1
-; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
+; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test22:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
-; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-LE-NEXT: xxlandc vs0, vs1, vs0
-; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
+; CHECK-LE-NEXT: xxlor vs0, vs1, vs0
+; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0
; CHECK-LE-NEXT: blr
entry:
%m = fcmp ueq <4 x float> %c, %d
>From 3a90a6990a0970b81fd30e67400074515bc2dbb2 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:22:21 +0000
Subject: [PATCH 09/20] Revert "[DAG]: Updated tests"
This reverts commit cef0067d84a41aad176352f02d9585dd71355a71.
---
llvm/test/CodeGen/X86/andnot-patterns.ll | 184 +++++++++--------------
1 file changed, 68 insertions(+), 116 deletions(-)
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index 0701d7046fc35..fc573fbd4fc99 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -761,7 +761,6 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
;
; X86-BMI-LABEL: andnot_bitreverse_i64:
; X86-BMI: # %bb.0:
-; X86-BMI-NEXT: pushl %esi
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: bswapl %eax
@@ -775,16 +774,13 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-BMI-NEXT: shrl $2, %eax
; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT: leal (%eax,%edx,4), %esi
-; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; X86-BMI-NEXT: addl %esi, %esi
-; X86-BMI-NEXT: shll $2, %edx
-; X86-BMI-NEXT: notl %edx
-; X86-BMI-NEXT: andnl %edx, %eax, %eax
+; X86-BMI-NEXT: leal (%eax,%edx,4), %eax
+; X86-BMI-NEXT: movl %eax, %edx
+; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555
; X86-BMI-NEXT: shrl %eax
-; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-BMI-NEXT: andnl %eax, %esi, %eax
-; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT: leal (%eax,%edx,2), %eax
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
; X86-BMI-NEXT: bswapl %ecx
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
@@ -796,17 +792,13 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-BMI-NEXT: shrl $2, %ecx
; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-BMI-NEXT: leal (%ecx,%edx,4), %esi
-; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; X86-BMI-NEXT: addl %esi, %esi
-; X86-BMI-NEXT: shll $2, %edx
-; X86-BMI-NEXT: notl %edx
-; X86-BMI-NEXT: andnl %edx, %ecx, %ecx
+; X86-BMI-NEXT: leal (%ecx,%edx,4), %ecx
+; X86-BMI-NEXT: movl %ecx, %edx
+; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555
; X86-BMI-NEXT: shrl %ecx
-; X86-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA
-; X86-BMI-NEXT: andnl %ecx, %esi, %edx
-; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-BMI-NEXT: popl %esi
+; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT: leal (%ecx,%edx,2), %ecx
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: andnot_bitreverse_i64:
@@ -845,23 +837,19 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
; X64-BMI-NEXT: andq %rcx, %rsi
; X64-BMI-NEXT: shlq $4, %rsi
; X64-BMI-NEXT: orq %rax, %rsi
-; X64-BMI-NEXT: movq %rsi, %rax
-; X64-BMI-NEXT: shrq $2, %rax
-; X64-BMI-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-BMI-NEXT: andq %rcx, %rax
-; X64-BMI-NEXT: andq %rcx, %rsi
-; X64-BMI-NEXT: leaq (,%rsi,4), %rcx
-; X64-BMI-NEXT: notq %rcx
-; X64-BMI-NEXT: andnq %rcx, %rax, %rcx
-; X64-BMI-NEXT: shrq %rcx
-; X64-BMI-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA
-; X64-BMI-NEXT: orq %rcx, %rdx
-; X64-BMI-NEXT: leaq (%rax,%rsi,4), %rax
-; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BMI-NEXT: movq %rsi, %rcx
; X64-BMI-NEXT: andq %rax, %rcx
-; X64-BMI-NEXT: addq %rcx, %rcx
-; X64-BMI-NEXT: andnq %rdx, %rcx, %rax
-; X64-BMI-NEXT: andq %rdi, %rax
+; X64-BMI-NEXT: shrq $2, %rsi
+; X64-BMI-NEXT: andq %rax, %rsi
+; X64-BMI-NEXT: leaq (%rsi,%rcx,4), %rax
+; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT: movq %rax, %rdx
+; X64-BMI-NEXT: andq %rcx, %rdx
+; X64-BMI-NEXT: shrq %rax
+; X64-BMI-NEXT: andq %rcx, %rax
+; X64-BMI-NEXT: leaq (%rax,%rdx,2), %rax
+; X64-BMI-NEXT: andnq %rdi, %rax, %rax
; X64-BMI-NEXT: retq
%not = xor i64 %a1, -1
%bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not)
@@ -908,16 +896,13 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-BMI-NEXT: shrl $2, %eax
; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx
-; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; X86-BMI-NEXT: addl %edx, %edx
-; X86-BMI-NEXT: shll $2, %ecx
-; X86-BMI-NEXT: notl %ecx
-; X86-BMI-NEXT: andnl %ecx, %eax, %eax
+; X86-BMI-NEXT: leal (%eax,%ecx,4), %eax
+; X86-BMI-NEXT: movl %eax, %ecx
+; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-BMI-NEXT: shrl %eax
-; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-BMI-NEXT: andnl %eax, %edx, %eax
-; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT: leal (%eax,%ecx,2), %eax
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: andnot_bitreverse_i32:
@@ -955,19 +940,16 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
; X64-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
; X64-BMI-NEXT: orl %eax, %esi
; X64-BMI-NEXT: movl %esi, %eax
-; X64-BMI-NEXT: shrl $2, %eax
; X64-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-BMI-NEXT: shrl $2, %esi
; X64-BMI-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X64-BMI-NEXT: leal (,%rsi,4), %ecx
-; X64-BMI-NEXT: notl %ecx
-; X64-BMI-NEXT: andnl %ecx, %eax, %ecx
-; X64-BMI-NEXT: shrl %ecx
-; X64-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA
-; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax
+; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT: movl %eax, %ecx
+; X64-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X64-BMI-NEXT: shrl %eax
; X64-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-BMI-NEXT: addl %eax, %eax
-; X64-BMI-NEXT: andnl %ecx, %eax, %eax
-; X64-BMI-NEXT: andl %edi, %eax
+; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT: andnl %edi, %eax, %eax
; X64-BMI-NEXT: retq
%not = xor i32 %a1, -1
%bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not)
@@ -976,57 +958,30 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
}
define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
-; X86-NOBMI-LABEL: andnot_bitreverse_i16:
-; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: rolw $8, %ax
-; X86-NOBMI-NEXT: movl %eax, %ecx
-; X86-NOBMI-NEXT: andl $3855, %ecx # imm = 0xF0F
-; X86-NOBMI-NEXT: shll $4, %ecx
-; X86-NOBMI-NEXT: shrl $4, %eax
-; X86-NOBMI-NEXT: andl $3855, %eax # imm = 0xF0F
-; X86-NOBMI-NEXT: orl %ecx, %eax
-; X86-NOBMI-NEXT: movl %eax, %ecx
-; X86-NOBMI-NEXT: andl $13107, %ecx # imm = 0x3333
-; X86-NOBMI-NEXT: shrl $2, %eax
-; X86-NOBMI-NEXT: andl $13107, %eax # imm = 0x3333
-; X86-NOBMI-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NOBMI-NEXT: movl %eax, %ecx
-; X86-NOBMI-NEXT: andl $21845, %ecx # imm = 0x5555
-; X86-NOBMI-NEXT: shrl %eax
-; X86-NOBMI-NEXT: andl $21845, %eax # imm = 0x5555
-; X86-NOBMI-NEXT: leal (%eax,%ecx,2), %eax
-; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andw {{[0-9]+}}(%esp), %ax
-; X86-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NOBMI-NEXT: retl
-;
-; X86-BMI-LABEL: andnot_bitreverse_i16:
-; X86-BMI: # %bb.0:
-; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT: rolw $8, %ax
-; X86-BMI-NEXT: movl %eax, %ecx
-; X86-BMI-NEXT: andl $3855, %ecx # imm = 0xF0F
-; X86-BMI-NEXT: shll $4, %ecx
-; X86-BMI-NEXT: shrl $4, %eax
-; X86-BMI-NEXT: andl $3855, %eax # imm = 0xF0F
-; X86-BMI-NEXT: orl %ecx, %eax
-; X86-BMI-NEXT: movl %eax, %ecx
-; X86-BMI-NEXT: andl $13107, %ecx # imm = 0x3333
-; X86-BMI-NEXT: shrl $2, %eax
-; X86-BMI-NEXT: andl $13107, %eax # imm = 0x3333
-; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx
-; X86-BMI-NEXT: andl $21845, %edx # imm = 0x5555
-; X86-BMI-NEXT: addl %edx, %edx
-; X86-BMI-NEXT: shll $2, %ecx
-; X86-BMI-NEXT: notl %ecx
-; X86-BMI-NEXT: andnl %ecx, %eax, %eax
-; X86-BMI-NEXT: shrl %eax
-; X86-BMI-NEXT: orl $43690, %eax # imm = 0xAAAA
-; X86-BMI-NEXT: andnl %eax, %edx, %eax
-; X86-BMI-NEXT: andw {{[0-9]+}}(%esp), %ax
-; X86-BMI-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-BMI-NEXT: retl
+; X86-LABEL: andnot_bitreverse_i16:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: rolw $8, %ax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andl $3855, %ecx # imm = 0xF0F
+; X86-NEXT: shll $4, %ecx
+; X86-NEXT: shrl $4, %eax
+; X86-NEXT: andl $3855, %eax # imm = 0xF0F
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andl $13107, %ecx # imm = 0x3333
+; X86-NEXT: shrl $2, %eax
+; X86-NEXT: andl $13107, %eax # imm = 0x3333
+; X86-NEXT: leal (%eax,%ecx,4), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andl $21845, %ecx # imm = 0x5555
+; X86-NEXT: shrl %eax
+; X86-NEXT: andl $21845, %eax # imm = 0x5555
+; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: notl %eax
+; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
;
; X64-NOBMI-LABEL: andnot_bitreverse_i16:
; X64-NOBMI: # %bb.0:
@@ -1064,19 +1019,16 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
; X64-BMI-NEXT: andl $3855, %esi # imm = 0xF0F
; X64-BMI-NEXT: orl %eax, %esi
; X64-BMI-NEXT: movl %esi, %eax
-; X64-BMI-NEXT: shrl $2, %eax
; X64-BMI-NEXT: andl $13107, %eax # imm = 0x3333
+; X64-BMI-NEXT: shrl $2, %esi
; X64-BMI-NEXT: andl $13107, %esi # imm = 0x3333
-; X64-BMI-NEXT: leal (,%rsi,4), %ecx
-; X64-BMI-NEXT: notl %ecx
-; X64-BMI-NEXT: andnl %ecx, %eax, %ecx
-; X64-BMI-NEXT: shrl %ecx
-; X64-BMI-NEXT: orl $-21846, %ecx # imm = 0xAAAA
-; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax
+; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT: movl %eax, %ecx
+; X64-BMI-NEXT: andl $21845, %ecx # imm = 0x5555
+; X64-BMI-NEXT: shrl %eax
; X64-BMI-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-BMI-NEXT: addl %eax, %eax
-; X64-BMI-NEXT: andnl %ecx, %eax, %eax
-; X64-BMI-NEXT: andl %edi, %eax
+; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT: andnl %edi, %eax, %eax
; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax
; X64-BMI-NEXT: retq
%not = xor i16 %a1, -1
>From 1245b6b33393488eab867affc2b8c59693c26ecf Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:53:43 +0000
Subject: [PATCH 10/20] [DAG]: Rewrite `~(a | b | c)` into `~a & ~b & ~c`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5b77dc423b66b..fba8b62f5ca35 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10197,20 +10197,26 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
}
- // fold (not (or A, B)) -> and(not(A), not(B))
+ // fold (not (or A, or(B, C))) -> and(not(A), and(not(B), not(C))
if (TLI.hasAndNot(SDValue(N, 0))) {
// If we have AndNot then it is profitable to apply demorgan to make use
// of the machine instruction.
SDValue A;
SDValue B;
+ SDValue C;
APInt Cst;
- if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) &&
+ if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), m_ConstInt(Cst))) &&
Cst.isAllOnes()) {
auto Ty = N->getValueType(0);
+
+ auto NegA = DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty));
+ auto NegB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty));
+ auto NegC = DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty));
+
return DAG.getNode(
ISD::AND, DL, VT,
- DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)),
- DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)));
+ NegA,
+ DAG.getNode(ISD::AND, DL, VT, NegB, NegC));
}
}
>From 746b101885509bddc2f6ad3460a09eaf61de99ae Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:54:42 +0000
Subject: [PATCH 11/20] [X86]: Created new test
---
llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll | 171 ++++++++++++++++++
1 file changed, 171 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll
diff --git a/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll b/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll
new file mode 100644
index 0000000000000..a1ace1b6ca157
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI
+
+define i32 @not_rewrite_demorgan_i32(i32 %a, i32 %b) nounwind {
+; X86-WITH-BMI-LABEL: not_rewrite_demorgan_i32:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: notl %eax
+; X86-WITH-BMI-NEXT: retl
+;
+; X64-WITH-BMI-LABEL: not_rewrite_demorgan_i32:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: movl %edi, %eax
+; X64-WITH-BMI-NEXT: orl %esi, %eax
+; X64-WITH-BMI-NEXT: notl %eax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i32:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: retl
+;
+; X64-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i32:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT: orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT: notl %eax
+; X64-WITHOUT-BMI-NEXT: retq
+ %temp = or i32 %b, %a
+ %res = xor i32 %temp, -1
+ ret i32 %res
+}
+
+define i32 @rewrite_demorgan_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; X86-WITH-BMI-LABEL: rewrite_demorgan_i32:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: notl %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %ecx
+; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax
+; X86-WITH-BMI-NEXT: retl
+;
+; X64-WITH-BMI-LABEL: rewrite_demorgan_i32:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: notl %edi
+; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax
+; X64-WITH-BMI-NEXT: andnl %eax, %edx, %eax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: rewrite_demorgan_i32:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: retl
+;
+; X64-WITHOUT-BMI-LABEL: rewrite_demorgan_i32:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movl %edi, %eax
+; X64-WITHOUT-BMI-NEXT: orl %esi, %eax
+; X64-WITHOUT-BMI-NEXT: orl %edx, %eax
+; X64-WITHOUT-BMI-NEXT: notl %eax
+; X64-WITHOUT-BMI-NEXT: retq
+ %and.demorgan = or i32 %b, %a
+ %and3.demorgan = or i32 %and.demorgan, %c
+ %and3 = xor i32 %and3.demorgan, -1
+ ret i32 %and3
+}
+
+define i64 @not_rewrite_demorgan_i64(i64 %a, i64 %b) nounwind {
+; X86-WITH-BMI-LABEL: not_rewrite_demorgan_i64:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: notl %eax
+; X86-WITH-BMI-NEXT: notl %edx
+; X86-WITH-BMI-NEXT: retl
+;
+; X64-WITH-BMI-LABEL: not_rewrite_demorgan_i64:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: movq %rdi, %rax
+; X64-WITH-BMI-NEXT: orq %rsi, %rax
+; X64-WITH-BMI-NEXT: notq %rax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i64:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: notl %edx
+; X86-WITHOUT-BMI-NEXT: retl
+;
+; X64-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i64:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT: notq %rax
+; X64-WITHOUT-BMI-NEXT: retq
+ %temp = or i64 %b, %a
+ %res = xor i64 %temp, -1
+ ret i64 %res
+}
+
+define i64 @rewrite_demorgan_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; X86-WITH-BMI-LABEL: rewrite_demorgan_i64:
+; X86-WITH-BMI: # %bb.0:
+; X86-WITH-BMI-NEXT: pushl %ebx
+; X86-WITH-BMI-NEXT: pushl %edi
+; X86-WITH-BMI-NEXT: pushl %esi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-WITH-BMI-NEXT: notl %edi
+; X86-WITH-BMI-NEXT: andnl %edi, %edx, %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax
+; X86-WITH-BMI-NEXT: notl %ebx
+; X86-WITH-BMI-NEXT: andnl %ebx, %esi, %edx
+; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %edx
+; X86-WITH-BMI-NEXT: popl %esi
+; X86-WITH-BMI-NEXT: popl %edi
+; X86-WITH-BMI-NEXT: popl %ebx
+; X86-WITH-BMI-NEXT: retl
+;
+; X64-WITH-BMI-LABEL: rewrite_demorgan_i64:
+; X64-WITH-BMI: # %bb.0:
+; X64-WITH-BMI-NEXT: notq %rdi
+; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax
+; X64-WITH-BMI-NEXT: andnq %rax, %rdx, %rax
+; X64-WITH-BMI-NEXT: retq
+;
+; X86-WITHOUT-BMI-LABEL: rewrite_demorgan_i64:
+; X86-WITHOUT-BMI: # %bb.0:
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-WITHOUT-BMI-NEXT: notl %eax
+; X86-WITHOUT-BMI-NEXT: notl %edx
+; X86-WITHOUT-BMI-NEXT: retl
+;
+; X64-WITHOUT-BMI-LABEL: rewrite_demorgan_i64:
+; X64-WITHOUT-BMI: # %bb.0:
+; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax
+; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax
+; X64-WITHOUT-BMI-NEXT: orq %rdx, %rax
+; X64-WITHOUT-BMI-NEXT: notq %rax
+; X64-WITHOUT-BMI-NEXT: retq
+ %and.demorgan = or i64 %b, %a
+ %and3.demorgan = or i64 %and.demorgan, %c
+ %and3 = xor i64 %and3.demorgan, -1
+ ret i64 %and3
+}
>From f237020a2005d5b42cc32e0849eacd5ba806ff2f Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:55:33 +0000
Subject: [PATCH 12/20] [DAG]: Run fmt
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fba8b62f5ca35..fbc63d8eb6d40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10205,18 +10205,20 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue B;
SDValue C;
APInt Cst;
- if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), m_ConstInt(Cst))) &&
+ if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))),
+ m_ConstInt(Cst))) &&
Cst.isAllOnes()) {
auto Ty = N->getValueType(0);
- auto NegA = DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty));
- auto NegB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty));
- auto NegC = DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty));
+ auto NegA =
+ DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty));
+ auto NegB =
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty));
+ auto NegC =
+ DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty));
- return DAG.getNode(
- ISD::AND, DL, VT,
- NegA,
- DAG.getNode(ISD::AND, DL, VT, NegB, NegC));
+ return DAG.getNode(ISD::AND, DL, VT, NegA,
+ DAG.getNode(ISD::AND, DL, VT, NegB, NegC));
}
}
>From 956b849a31b714a235ad160558f1e3ff69f5f363 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:55:51 +0000
Subject: [PATCH 13/20] [AArch64]: Updated tests
---
llvm/test/CodeGen/AArch64/bsl.ll | 120 ++++++++-------
.../CodeGen/AArch64/build-vector-dup-simd.ll | 24 +--
llvm/test/CodeGen/AArch64/ctlz.ll | 139 +++++++++++-------
llvm/test/CodeGen/AArch64/eon.ll | 22 ++-
.../CodeGen/AArch64/fp16-v4-instructions.ll | 44 ++++--
.../CodeGen/AArch64/fp16-v8-instructions.ll | 50 +++++--
llvm/test/CodeGen/AArch64/sve2-bsl.ll | 36 +++--
7 files changed, 258 insertions(+), 177 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll
index df6b6f75b8935..fcf7393d2c801 100644
--- a/llvm/test/CodeGen/AArch64/bsl.ll
+++ b/llvm/test/CodeGen/AArch64/bsl.ll
@@ -32,17 +32,19 @@ define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
; NEON-LABEL: nbsl_v1i64:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT: and v0.8b, v2.8b, v0.8b
+; NEON-NEXT: bic v1.8b, v1.8b, v2.8b
; NEON-NEXT: mvn v0.8b, v0.8b
+; NEON-NEXT: bic v0.8b, v0.8b, v1.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v1i64:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b
+; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b
; SVE2-NEXT: ret
%4 = and <1 x i64> %2, %0
%5 = xor <1 x i64> %2, splat (i64 -1)
@@ -78,9 +80,8 @@ define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
; NEON-LABEL: bsl2n_v1i64:
; NEON: // %bb.0:
-; NEON-NEXT: and v0.8b, v2.8b, v0.8b
-; NEON-NEXT: orr v1.8b, v2.8b, v1.8b
-; NEON-NEXT: orn v0.8b, v0.8b, v1.8b
+; NEON-NEXT: mvn v1.8b, v1.8b
+; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: bsl2n_v1i64:
@@ -118,17 +119,19 @@ define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
; NEON-LABEL: nbsl_v2i64:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT: and v0.16b, v2.16b, v0.16b
+; NEON-NEXT: bic v1.16b, v1.16b, v2.16b
; NEON-NEXT: mvn v0.16b, v0.16b
+; NEON-NEXT: bic v0.16b, v0.16b, v1.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v2i64:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b
+; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b
; SVE2-NEXT: ret
%4 = and <2 x i64> %2, %0
%5 = xor <2 x i64> %2, splat (i64 -1)
@@ -164,9 +167,8 @@ define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
; NEON-LABEL: bsl2n_v2i64:
; NEON: // %bb.0:
-; NEON-NEXT: and v0.16b, v2.16b, v0.16b
-; NEON-NEXT: orr v1.16b, v2.16b, v1.16b
-; NEON-NEXT: orn v0.16b, v0.16b, v1.16b
+; NEON-NEXT: mvn v1.16b, v1.16b
+; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: bsl2n_v2i64:
@@ -189,17 +191,18 @@ define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
; NEON-LABEL: nbsl_v8i8:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
-; NEON-NEXT: mvn v0.8b, v0.8b
+; NEON-NEXT: and v3.8b, v2.8b, v1.8b
+; NEON-NEXT: and v0.8b, v2.8b, v0.8b
+; NEON-NEXT: orn v1.8b, v3.8b, v1.8b
+; NEON-NEXT: bic v0.8b, v1.8b, v0.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v8i8:
; SVE2: // %bb.0:
-; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT: and v3.8b, v2.8b, v1.8b
+; SVE2-NEXT: and v0.8b, v2.8b, v0.8b
+; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b
+; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b
; SVE2-NEXT: ret
%4 = and <8 x i8> %2, %0
%5 = xor <8 x i8> %2, splat (i8 -1)
@@ -212,17 +215,18 @@ define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
; NEON-LABEL: nbsl_v4i16:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
-; NEON-NEXT: mvn v0.8b, v0.8b
+; NEON-NEXT: and v3.8b, v2.8b, v1.8b
+; NEON-NEXT: and v0.8b, v2.8b, v0.8b
+; NEON-NEXT: orn v1.8b, v3.8b, v1.8b
+; NEON-NEXT: bic v0.8b, v1.8b, v0.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v4i16:
; SVE2: // %bb.0:
-; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT: and v3.8b, v2.8b, v1.8b
+; SVE2-NEXT: and v0.8b, v2.8b, v0.8b
+; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b
+; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b
; SVE2-NEXT: ret
%4 = and <4 x i16> %2, %0
%5 = xor <4 x i16> %2, splat (i16 -1)
@@ -235,17 +239,19 @@ define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
; NEON-LABEL: nbsl_v2i32:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT: and v0.8b, v2.8b, v0.8b
+; NEON-NEXT: bic v1.8b, v1.8b, v2.8b
; NEON-NEXT: mvn v0.8b, v0.8b
+; NEON-NEXT: bic v0.8b, v0.8b, v1.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v2i32:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b
+; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b
; SVE2-NEXT: ret
%4 = and <2 x i32> %2, %0
%5 = xor <2 x i32> %2, splat (i32 -1)
@@ -258,17 +264,18 @@ define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
; NEON-LABEL: nbsl_v16i8:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
-; NEON-NEXT: mvn v0.16b, v0.16b
+; NEON-NEXT: and v3.16b, v2.16b, v1.16b
+; NEON-NEXT: and v0.16b, v2.16b, v0.16b
+; NEON-NEXT: orn v1.16b, v3.16b, v1.16b
+; NEON-NEXT: bic v0.16b, v1.16b, v0.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v16i8:
; SVE2: // %bb.0:
-; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT: and v3.16b, v2.16b, v1.16b
+; SVE2-NEXT: and v0.16b, v2.16b, v0.16b
+; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b
+; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b
; SVE2-NEXT: ret
%4 = and <16 x i8> %2, %0
%5 = xor <16 x i8> %2, splat (i8 -1)
@@ -281,17 +288,18 @@ define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
; NEON-LABEL: nbsl_v8i16:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
-; NEON-NEXT: mvn v0.16b, v0.16b
+; NEON-NEXT: and v3.16b, v2.16b, v1.16b
+; NEON-NEXT: and v0.16b, v2.16b, v0.16b
+; NEON-NEXT: orn v1.16b, v3.16b, v1.16b
+; NEON-NEXT: bic v0.16b, v1.16b, v0.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v8i16:
; SVE2: // %bb.0:
-; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT: and v3.16b, v2.16b, v1.16b
+; SVE2-NEXT: and v0.16b, v2.16b, v0.16b
+; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b
+; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b
; SVE2-NEXT: ret
%4 = and <8 x i16> %2, %0
%5 = xor <8 x i16> %2, splat (i16 -1)
@@ -304,17 +312,19 @@ define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
; NEON-LABEL: nbsl_v4i32:
; NEON: // %bb.0:
-; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT: and v0.16b, v2.16b, v0.16b
+; NEON-NEXT: bic v1.16b, v1.16b, v2.16b
; NEON-NEXT: mvn v0.16b, v0.16b
+; NEON-NEXT: bic v0.16b, v0.16b, v1.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v4i32:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
-; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b
+; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b
; SVE2-NEXT: ret
%4 = and <4 x i32> %2, %0
%5 = xor <4 x i32> %2, splat (i32 -1)
@@ -471,16 +481,14 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 {
define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 {
; NEON-LABEL: nor_q:
; NEON: // %bb.0:
-; NEON-NEXT: orr v0.16b, v1.16b, v0.16b
-; NEON-NEXT: mvn v0.16b, v0.16b
+; NEON-NEXT: mvn v1.16b, v1.16b
+; NEON-NEXT: bic v0.16b, v1.16b, v0.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nor_q:
; SVE2: // %bb.0:
-; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
-; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z0.d
-; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT: mvn v1.16b, v1.16b
+; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b
; SVE2-NEXT: ret
%3 = or <2 x i64> %1, %0
%4 = xor <2 x i64> %3, splat (i64 -1)
diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
index ac0b8e89519dd..af7f9b6d471ad 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
@@ -117,10 +117,10 @@ entry:
define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq:
; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1
-; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
+; CHECK-NOFULLFP16-NEXT: fcmgt s2, s1, s0
+; CHECK-NOFULLFP16-NEXT: fcmgt s0, s0, s1
+; CHECK-NOFULLFP16-NEXT: mvn v1.8b, v2.8b
+; CHECK-NOFULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b
; CHECK-NOFULLFP16-NEXT: ret
;
; CHECK-NONANS-LABEL: dup_v1i32_ueq:
@@ -130,10 +130,10 @@ define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
;
; CHECK-FULLFP16-LABEL: dup_v1i32_ueq:
; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1
-; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
+; CHECK-FULLFP16-NEXT: fcmgt s2, s1, s0
+; CHECK-FULLFP16-NEXT: fcmgt s0, s0, s1
+; CHECK-FULLFP16-NEXT: mvn v1.8b, v2.8b
+; CHECK-FULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b
; CHECK-FULLFP16-NEXT: ret
entry:
%0 = fcmp ueq float %a, %b
@@ -260,10 +260,10 @@ entry:
define <1 x float> @dup_v1i32_uno(float %a, float %b) {
; CHECK-LABEL: dup_v1i32_uno:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmge s2, s0, s1
-; CHECK-NEXT: fcmgt s0, s1, s0
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: fcmgt s2, s1, s0
+; CHECK-NEXT: fcmge s0, s0, s1
+; CHECK-NEXT: mvn v1.8b, v2.8b
+; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
entry:
%0 = fcmp uno float %a, %b
diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll
index 04124609eec74..f459cc2d78442 100644
--- a/llvm/test/CodeGen/AArch64/ctlz.ll
+++ b/llvm/test/CodeGen/AArch64/ctlz.ll
@@ -276,18 +276,23 @@ define <2 x i64> @v2i64(<2 x i64> %d) {
; CHECK-SD-LABEL: v2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #1
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #2
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #4
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #8
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #16
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #32
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: orr v2.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-SD-NEXT: ushr v3.2d, v2.2d, #2
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: orr v2.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v3.16b
+; CHECK-SD-NEXT: ushr v4.2d, v2.2d, #4
+; CHECK-SD-NEXT: orr v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: ushr v1.2d, v2.2d, #8
+; CHECK-SD-NEXT: orr v2.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ushr v3.2d, v2.2d, #16
+; CHECK-SD-NEXT: orr v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v3.16b
+; CHECK-SD-NEXT: ushr v1.2d, v1.2d, #32
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: cnt v0.16b, v0.16b
; CHECK-SD-NEXT: uaddlp v0.8h, v0.16b
; CHECK-SD-NEXT: uaddlp v0.4s, v0.8h
@@ -314,34 +319,44 @@ define <3 x i64> @v3i64(<3 x i64> %d) {
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: ushr v4.2d, v2.2d, #1
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: orr v6.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: mvn v2.16b, v2.16b
; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #1
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ushr v1.2d, v2.2d, #1
-; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #2
-; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #2
-; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #4
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #4
-; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #8
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #8
-; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #16
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #16
-; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #32
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #32
+; CHECK-SD-NEXT: ushr v7.2d, v6.2d, #2
+; CHECK-SD-NEXT: bic v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: orr v3.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: orr v6.16b, v6.16b, v7.16b
+; CHECK-SD-NEXT: bic v2.16b, v2.16b, v7.16b
+; CHECK-SD-NEXT: ushr v5.2d, v3.2d, #2
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ushr v17.2d, v6.2d, #4
+; CHECK-SD-NEXT: orr v3.16b, v3.16b, v5.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v5.16b
+; CHECK-SD-NEXT: orr v6.16b, v6.16b, v17.16b
+; CHECK-SD-NEXT: bic v2.16b, v2.16b, v17.16b
+; CHECK-SD-NEXT: ushr v16.2d, v3.2d, #4
+; CHECK-SD-NEXT: ushr v4.2d, v6.2d, #8
+; CHECK-SD-NEXT: orr v3.16b, v3.16b, v16.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v16.16b
+; CHECK-SD-NEXT: orr v6.16b, v6.16b, v4.16b
+; CHECK-SD-NEXT: bic v2.16b, v2.16b, v4.16b
+; CHECK-SD-NEXT: ushr v1.2d, v3.2d, #8
+; CHECK-SD-NEXT: orr v3.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ushr v5.2d, v3.2d, #16
+; CHECK-SD-NEXT: orr v1.16b, v3.16b, v5.16b
+; CHECK-SD-NEXT: ushr v3.2d, v6.2d, #16
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v5.16b
+; CHECK-SD-NEXT: ushr v1.2d, v1.2d, #32
+; CHECK-SD-NEXT: orr v4.16b, v6.16b, v3.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: bic v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT: ushr v2.2d, v4.2d, #32
; CHECK-SD-NEXT: cnt v0.16b, v0.16b
-; CHECK-SD-NEXT: mvn v1.16b, v1.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: cnt v1.16b, v1.16b
; CHECK-SD-NEXT: uaddlp v0.8h, v0.16b
; CHECK-SD-NEXT: uaddlp v0.4s, v0.8h
@@ -377,30 +392,40 @@ define <4 x i64> @v4i64(<4 x i64> %d) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #1
; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #1
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #2
-; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #2
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #4
-; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #4
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #8
-; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #8
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #16
-; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #16
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #32
-; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #32
-; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: orr v4.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: orr v5.16b, v1.16b, v3.16b
; CHECK-SD-NEXT: mvn v0.16b, v0.16b
; CHECK-SD-NEXT: mvn v1.16b, v1.16b
+; CHECK-SD-NEXT: ushr v6.2d, v4.2d, #2
+; CHECK-SD-NEXT: ushr v7.2d, v5.2d, #2
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: orr v4.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT: orr v5.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v6.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v7.16b
+; CHECK-SD-NEXT: ushr v16.2d, v4.2d, #4
+; CHECK-SD-NEXT: ushr v17.2d, v5.2d, #4
+; CHECK-SD-NEXT: orr v4.16b, v4.16b, v16.16b
+; CHECK-SD-NEXT: orr v5.16b, v5.16b, v17.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v16.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v17.16b
+; CHECK-SD-NEXT: ushr v2.2d, v4.2d, #8
+; CHECK-SD-NEXT: ushr v3.2d, v5.2d, #8
+; CHECK-SD-NEXT: orr v4.16b, v4.16b, v2.16b
+; CHECK-SD-NEXT: orr v5.16b, v5.16b, v3.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: ushr v6.2d, v4.2d, #16
+; CHECK-SD-NEXT: ushr v7.2d, v5.2d, #16
+; CHECK-SD-NEXT: orr v2.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT: orr v3.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v6.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v7.16b
+; CHECK-SD-NEXT: ushr v2.2d, v2.2d, #32
+; CHECK-SD-NEXT: ushr v3.2d, v3.2d, #32
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: bic v1.16b, v1.16b, v3.16b
; CHECK-SD-NEXT: cnt v0.16b, v0.16b
; CHECK-SD-NEXT: cnt v1.16b, v1.16b
; CHECK-SD-NEXT: uaddlp v0.8h, v0.16b
diff --git a/llvm/test/CodeGen/AArch64/eon.ll b/llvm/test/CodeGen/AArch64/eon.ll
index 8b31cbfe16b1a..ea0e0122d9b6d 100644
--- a/llvm/test/CodeGen/AArch64/eon.ll
+++ b/llvm/test/CodeGen/AArch64/eon.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
; RUN: llc %s -pass-remarks-missed=gisel* -mtriple=aarch64-none-linux-gnu -global-isel -o - 2>&1 | FileCheck %s
@@ -6,8 +7,9 @@
; Check that the eon instruction is generated instead of eor,movn
define i64 @test1(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test1:
-; CHECK: eon
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eon x0, x0, x1, lsl #4
+; CHECK-NEXT: ret
entry:
%shl = shl i64 %b, 4
%neg = xor i64 %a, -1
@@ -18,10 +20,11 @@ entry:
; Same check with multiple uses of %neg
define i64 @test2(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test2:
-; CHECK: eon
-; CHECK: eon
-; CHECK: lsl
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eon x8, x0, x1, lsl #4
+; CHECK-NEXT: eon x9, x2, x1, lsl #4
+; CHECK-NEXT: lsl x0, x8, x9
+; CHECK-NEXT: ret
entry:
%shl = shl i64 %b, 4
%neg = xor i64 %shl, -1
@@ -33,9 +36,6 @@ entry:
; Check that eon is generated if the xor is a disjoint or.
define i64 @disjoint_or(i64 %a, i64 %b) {
-; CHECK-LABEL: disjoint_or:
-; CHECK: eon
-; CHECK: ret
%or = or disjoint i64 %a, %b
%eon = xor i64 %or, -1
ret i64 %eon
@@ -43,10 +43,6 @@ define i64 @disjoint_or(i64 %a, i64 %b) {
; Check that eon is *not* generated if the or is not disjoint.
define i64 @normal_or(i64 %a, i64 %b) {
-; CHECK-LABEL: normal_or:
-; CHECK: orr
-; CHECK: mvn
-; CHECK: ret
%or = or i64 %a, %b
%not = xor i64 %or, -1
ret i64 %not
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 6233ce743b706..529b76cf84906 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -563,13 +563,13 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-LABEL: test_fcmp_ueq:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcmgt v2.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h
+; CHECK-FP16-SD-NEXT: fcmgt v0.4h, v0.4h, v1.4h
+; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b
+; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
; CHECK-CVT-GI: // %bb.0:
@@ -581,6 +581,14 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: fcmgt v2.4h, v0.4h, v1.4h
+; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-FP16-GI-NEXT: ret
%1 = fcmp ueq <4 x half> %a, %b
ret <4 x i1> %1
@@ -714,13 +722,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-LABEL: test_fcmp_uno:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcmge v2.4h, v0.4h, v1.4h
-; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: test_fcmp_uno:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h
+; CHECK-FP16-SD-NEXT: fcmge v0.4h, v0.4h, v1.4h
+; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b
+; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_uno:
; CHECK-CVT-GI: // %bb.0:
@@ -732,6 +740,14 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_uno:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: fcmge v2.4h, v0.4h, v1.4h
+; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-FP16-GI-NEXT: ret
%1 = fcmp uno <4 x half> %a, %b
ret <4 x i1> %1
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 86763eb5f9e3b..6d67fc9ebe1c6 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -990,14 +990,14 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-LABEL: test_fcmp_ueq:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcmgt v2.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
-; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h
+; CHECK-FP16-SD-NEXT: fcmgt v0.8h, v0.8h, v1.8h
+; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
; CHECK-CVT-GI: // %bb.0:
@@ -1016,6 +1016,15 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: fcmgt v2.8h, v0.8h, v1.8h
+; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-FP16-GI-NEXT: ret
%1 = fcmp ueq <8 x half> %a, %b
ret <8 x i1> %1
}
@@ -1190,14 +1199,14 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-LABEL: test_fcmp_uno:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcmge v2.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
-; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: test_fcmp_uno:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h
+; CHECK-FP16-SD-NEXT: fcmge v0.8h, v0.8h, v1.8h
+; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b
+; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_uno:
; CHECK-CVT-GI: // %bb.0:
@@ -1216,6 +1225,15 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: test_fcmp_uno:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: fcmge v2.8h, v0.8h, v1.8h
+; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-FP16-GI-NEXT: ret
%1 = fcmp uno <8 x half> %a, %b
ret <8 x i1> %1
}
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 6cfe66eb8e633..80293388a5cf9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -46,7 +46,9 @@ define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: nbsl_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.b, #127 // =0x7f
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: and z1.b, z1.b, #0x80
+; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT: bic z0.d, z2.d, z1.d
; CHECK-NEXT: ret
%1 = and <vscale x 16 x i8> %a, splat(i8 127)
%2 = and <vscale x 16 x i8> %b, splat(i8 -128)
@@ -59,7 +61,9 @@ define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
; CHECK-LABEL: nbsl_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.h, #32767 // =0x7fff
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: and z1.h, z1.h, #0x8000
+; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT: bic z0.d, z2.d, z1.d
; CHECK-NEXT: ret
%1 = and <vscale x 8 x i16> %a, splat(i16 32767)
%2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
@@ -72,7 +76,9 @@ define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; CHECK-LABEL: nbsl_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.s, #0x7fffffff
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: and z1.s, z1.s, #0x80000000
+; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT: bic z0.d, z2.d, z1.d
; CHECK-NEXT: ret
%1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
%2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
@@ -85,7 +91,9 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
; CHECK-LABEL: nbsl_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
+; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
+; CHECK-NEXT: bic z0.d, z2.d, z1.d
; CHECK-NEXT: ret
%1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
%2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
@@ -115,7 +123,9 @@ define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x
define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
; CHECK-LABEL: codegen_nbsl_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%4 = and <vscale x 16 x i8> %2, %0
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
@@ -165,7 +175,9 @@ define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x
define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
; CHECK-LABEL: codegen_nbsl_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%4 = and <vscale x 8 x i16> %2, %0
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
@@ -215,7 +227,9 @@ define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x
define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
; CHECK-LABEL: codegen_nbsl_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%4 = and <vscale x 4 x i32> %2, %0
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
@@ -265,7 +279,9 @@ define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x
define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
; CHECK-LABEL: codegen_nbsl_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
+; CHECK-NEXT: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%4 = and <vscale x 2 x i64> %2, %0
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
@@ -341,7 +357,9 @@ define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: nor:
; CHECK: // %bb.0:
-; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: eor z1.d, z1.d, z2.d
+; CHECK-NEXT: bic z0.d, z1.d, z0.d
; CHECK-NEXT: ret
%3 = or <vscale x 2 x i64> %1, %0
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
>From 643e4d53a6884f33b960c1fca389422999611d01 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:56:14 +0000
Subject: [PATCH 14/20] [X86]: Updated tests
---
llvm/test/CodeGen/X86/abds-vector-128.ll | 6 +-
.../test/CodeGen/X86/avx512-mask-bit-manip.ll | 25 +-
llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 174 ------------
llvm/test/CodeGen/X86/bool-ext-inc.ll | 7 +-
llvm/test/CodeGen/X86/combine-or.ll | 39 ++-
llvm/test/CodeGen/X86/combine-srl.ll | 6 +-
.../CodeGen/X86/expand-vp-int-intrinsics.ll | 6 +-
llvm/test/CodeGen/X86/icmp-abs-C-vec.ll | 105 +++----
llvm/test/CodeGen/X86/icmp-pow2-diff.ll | 54 ++--
llvm/test/CodeGen/X86/ispow2.ll | 24 +-
llvm/test/CodeGen/X86/machine-cp.ll | 67 ++---
llvm/test/CodeGen/X86/mul-cmp.ll | 16 +-
llvm/test/CodeGen/X86/promote-cmp.ll | 34 +--
llvm/test/CodeGen/X86/sat-add.ll | 10 +-
llvm/test/CodeGen/X86/setcc-combine.ll | 6 +-
llvm/test/CodeGen/X86/setcc-logic.ll | 7 +-
.../CodeGen/X86/srem-seteq-vec-nonsplat.ll | 16 +-
llvm/test/CodeGen/X86/sshl_sat_vec.ll | 6 +-
...-masked-merge-vector-variablemask-const.ll | 15 +-
.../X86/urem-seteq-vec-tautological.ll | 12 +-
llvm/test/CodeGen/X86/vec_cmp_sint-128.ll | 48 ++--
llvm/test/CodeGen/X86/vec_cmp_uint-128.ll | 48 ++--
llvm/test/CodeGen/X86/vec_compare.ll | 24 +-
llvm/test/CodeGen/X86/vec_ctbits.ll | 12 +-
llvm/test/CodeGen/X86/vec_setcc-2.ll | 13 +-
llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 96 +++----
llvm/test/CodeGen/X86/vector-lzcnt-512.ll | 266 +++++++++---------
llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll | 6 +-
llvm/test/CodeGen/X86/vector-popcnt-128.ll | 10 +-
llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 24 +-
llvm/test/CodeGen/X86/vsplit-and.ll | 22 +-
31 files changed, 534 insertions(+), 670 deletions(-)
delete mode 100644 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index 148be83892b72..bc57a31f063b5 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -756,9 +756,9 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE2-NEXT: pand %xmm6, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: paddq %xmm4, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
index 3fcfb9d278da7..37df42ea2682d 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
@@ -714,18 +714,19 @@ define <64 x i8> @tzmsk_v64i8(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-NEXT: vpmovmskb %ymm4, %ecx
; AVX512F-NEXT: shlq $32, %rcx
; AVX512F-NEXT: leaq (%rax,%rcx), %rdx
-; AVX512F-NEXT: addq %rcx, %rax
-; AVX512F-NEXT: addq $-1, %rax
-; AVX512F-NEXT: andnq %rax, %rdx, %rax
-; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: movl %eax, %edx
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: shrq $32, %rax
-; AVX512F-NEXT: shrq $48, %rcx
-; AVX512F-NEXT: shrl $16, %edx
-; AVX512F-NEXT: kmovw %edx, %k2
-; AVX512F-NEXT: kmovw %ecx, %k3
-; AVX512F-NEXT: kmovw %eax, %k4
+; AVX512F-NEXT: addq $-1, %rdx
+; AVX512F-NEXT: notq %rcx
+; AVX512F-NEXT: andnq %rcx, %rax, %rax
+; AVX512F-NEXT: andq %rax, %rdx
+; AVX512F-NEXT: movq %rdx, %rax
+; AVX512F-NEXT: movl %edx, %ecx
+; AVX512F-NEXT: kmovw %edx, %k1
+; AVX512F-NEXT: shrq $32, %rdx
+; AVX512F-NEXT: shrq $48, %rax
+; AVX512F-NEXT: shrl $16, %ecx
+; AVX512F-NEXT: kmovw %ecx, %k2
+; AVX512F-NEXT: kmovw %eax, %k3
+; AVX512F-NEXT: kmovw %edx, %k4
; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
deleted file mode 100644
index 7f3a376b24b2a..0000000000000
--- a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll
+++ /dev/null
@@ -1,174 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI
-
-define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_i32:
-; X86-WITH-BMI: # %bb.0:
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT: notl %ecx
-; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax
-; X86-WITH-BMI-NEXT: retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_i32:
-; X64-WITH-BMI: # %bb.0:
-; X64-WITH-BMI-NEXT: notl %edi
-; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax
-; X64-WITH-BMI-NEXT: retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
-; X86-WITHOUT-BMI: # %bb.0:
-; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: notl %eax
-; X86-WITHOUT-BMI-NEXT: retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i32:
-; X64-WITHOUT-BMI: # %bb.0:
-; X64-WITHOUT-BMI-NEXT: movl %edi, %eax
-; X64-WITHOUT-BMI-NEXT: orl %esi, %eax
-; X64-WITHOUT-BMI-NEXT: notl %eax
-; X64-WITHOUT-BMI-NEXT: retq
- %temp = or i32 %b, %a
- %res = xor i32 %temp, -1
- ret i32 %res
-}
-
-define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X86-WITH-BMI: # %bb.0:
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-WITH-BMI-NEXT: notl %edx
-; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %ecx
-; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax
-; X86-WITH-BMI-NEXT: retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X64-WITH-BMI: # %bb.0:
-; X64-WITH-BMI-NEXT: notl %edi
-; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax
-; X64-WITH-BMI-NEXT: andnl %eax, %edx, %eax
-; X64-WITH-BMI-NEXT: retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X86-WITHOUT-BMI: # %bb.0:
-; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: notl %eax
-; X86-WITHOUT-BMI-NEXT: retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32:
-; X64-WITHOUT-BMI: # %bb.0:
-; X64-WITHOUT-BMI-NEXT: movl %edi, %eax
-; X64-WITHOUT-BMI-NEXT: orl %esi, %eax
-; X64-WITHOUT-BMI-NEXT: orl %edx, %eax
-; X64-WITHOUT-BMI-NEXT: notl %eax
-; X64-WITHOUT-BMI-NEXT: retq
- %and.demorgan = or i32 %b, %a
- %and3.demorgan = or i32 %and.demorgan, %c
- %and3 = xor i32 %and3.demorgan, -1
- ret i32 %and3
-}
-
-define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_i64:
-; X86-WITH-BMI: # %bb.0:
-; X86-WITH-BMI-NEXT: pushl %esi
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-WITH-BMI-NEXT: notl %edx
-; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax
-; X86-WITH-BMI-NEXT: notl %esi
-; X86-WITH-BMI-NEXT: andnl %esi, %ecx, %edx
-; X86-WITH-BMI-NEXT: popl %esi
-; X86-WITH-BMI-NEXT: retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_i64:
-; X64-WITH-BMI: # %bb.0:
-; X64-WITH-BMI-NEXT: notq %rdi
-; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax
-; X64-WITH-BMI-NEXT: retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
-; X86-WITHOUT-BMI: # %bb.0:
-; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: notl %eax
-; X86-WITHOUT-BMI-NEXT: notl %edx
-; X86-WITHOUT-BMI-NEXT: retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i64:
-; X64-WITHOUT-BMI: # %bb.0:
-; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax
-; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax
-; X64-WITHOUT-BMI-NEXT: notq %rax
-; X64-WITHOUT-BMI-NEXT: retq
- %temp = or i64 %b, %a
- %res = xor i64 %temp, -1
- ret i64 %res
-}
-
-define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind {
-; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X86-WITH-BMI: # %bb.0:
-; X86-WITH-BMI-NEXT: pushl %ebx
-; X86-WITH-BMI-NEXT: pushl %edi
-; X86-WITH-BMI-NEXT: pushl %esi
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-WITH-BMI-NEXT: notl %edi
-; X86-WITH-BMI-NEXT: andnl %edi, %edx, %edx
-; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax
-; X86-WITH-BMI-NEXT: notl %ebx
-; X86-WITH-BMI-NEXT: andnl %ebx, %esi, %edx
-; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %edx
-; X86-WITH-BMI-NEXT: popl %esi
-; X86-WITH-BMI-NEXT: popl %edi
-; X86-WITH-BMI-NEXT: popl %ebx
-; X86-WITH-BMI-NEXT: retl
-;
-; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X64-WITH-BMI: # %bb.0:
-; X64-WITH-BMI-NEXT: notq %rdi
-; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax
-; X64-WITH-BMI-NEXT: andnq %rax, %rdx, %rax
-; X64-WITH-BMI-NEXT: retq
-;
-; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X86-WITHOUT-BMI: # %bb.0:
-; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx
-; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-WITHOUT-BMI-NEXT: notl %eax
-; X86-WITHOUT-BMI-NEXT: notl %edx
-; X86-WITHOUT-BMI-NEXT: retl
-;
-; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64:
-; X64-WITHOUT-BMI: # %bb.0:
-; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax
-; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax
-; X64-WITHOUT-BMI-NEXT: orq %rdx, %rax
-; X64-WITHOUT-BMI-NEXT: notq %rax
-; X64-WITHOUT-BMI-NEXT: retq
- %and.demorgan = or i64 %b, %a
- %and3.demorgan = or i64 %and.demorgan, %c
- %and3 = xor i64 %and3.demorgan, -1
- ret i64 %and3
-}
diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll
index 088b0ce857f20..d89893f94bdae 100644
--- a/llvm/test/CodeGen/X86/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll
@@ -88,8 +88,11 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32>
; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp1 = icmp ne <4 x i32> %a, %b
%cmp2 = icmp ne <4 x i32> %c, %d
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 8c91274abf3dd..8d5bbb4ae8e1e 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -183,14 +183,32 @@ define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind {
}
define i64 @or_build_pair_not(i32 %a0, i32 %a1) {
-; CHECK-LABEL: or_build_pair_not:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT: shlq $32, %rsi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: notq %rax
-; CHECK-NEXT: retq
+; SSE-LABEL: or_build_pair_not:
+; SSE: # %bb.0:
+; SSE-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE-NEXT: shlq $32, %rsi
+; SSE-NEXT: movl %edi, %eax
+; SSE-NEXT: orq %rsi, %rax
+; SSE-NEXT: notq %rax
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: or_build_pair_not:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX1-NEXT: shlq $32, %rsi
+; AVX1-NEXT: movl %edi, %eax
+; AVX1-NEXT: orq %rsi, %rax
+; AVX1-NEXT: notq %rax
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: or_build_pair_not:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT: shlq $32, %rsi
+; AVX2-NEXT: notq %rsi
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: andnq %rsi, %rax, %rax
+; AVX2-NEXT: retq
%n0 = xor i32 %a0, -1
%n1 = xor i32 %a1, -1
%x0 = zext i32 %n0 to i64
@@ -262,10 +280,9 @@ define i64 @PR89533(<64 x i8> %a0) {
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: shlq $32, %rcx
-; AVX2-NEXT: orq %rax, %rcx
; AVX2-NEXT: notq %rcx
-; AVX2-NEXT: xorl %eax, %eax
-; AVX2-NEXT: tzcntq %rcx, %rax
+; AVX2-NEXT: andnq %rcx, %rax, %rax
+; AVX2-NEXT: tzcntq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cmp = icmp ne <64 x i8> %a0, <i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95>
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 7bc90534dcc6e..4e31177023b08 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -440,9 +440,9 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
index dbfa69d497698..7919495821efd 100644
--- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
@@ -1490,9 +1490,9 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pandn %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 23dcf334124c0..f59e53687ff74 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -640,8 +640,8 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
@@ -650,17 +650,18 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; SSE41-LABEL: ne_and_to_abs_vec4x64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT: pcmpeqq %xmm1, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
-; SSE41-NEXT: pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT: pcmpeqq %xmm5, %xmm0
-; SSE41-NEXT: por %xmm3, %xmm0
-; SSE41-NEXT: pcmpeqq %xmm5, %xmm1
-; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: packssdw %xmm3, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
; SSE41-NEXT: packssdw %xmm1, %xmm0
-; SSE41-NEXT: pxor %xmm4, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x64:
@@ -681,8 +682,9 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE2-NEXT: andps %xmm4, %xmm0
-; SSE2-NEXT: orps %xmm2, %xmm0
; SSE2-NEXT: xorps %xmm3, %xmm0
+; SSE2-NEXT: andnps %xmm0, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
%cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
@@ -706,51 +708,51 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = [129,129]
-; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
-; SSE41-NEXT: pcmpeqq %xmm1, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
-; SSE41-NEXT: pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487]
-; SSE41-NEXT: pcmpeqq %xmm5, %xmm0
-; SSE41-NEXT: por %xmm3, %xmm0
-; SSE41-NEXT: pcmpeqq %xmm5, %xmm1
-; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: packssdw %xmm3, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
; SSE41-NEXT: packssdw %xmm1, %xmm0
-; SSE41-NEXT: pxor %xmm4, %xmm0
-; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm2
+; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x64_sext:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm0
; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE2-NEXT: andps %xmm4, %xmm0
-; SSE2-NEXT: orps %xmm2, %xmm0
-; SSE2-NEXT: xorps %xmm3, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: xorps %xmm3, %xmm2
+; SSE2-NEXT: andnps %xmm2, %xmm0
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
@@ -868,8 +870,9 @@ define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -909,8 +912,9 @@ define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1031,8 +1035,8 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX2-NEXT: retq
;
@@ -1042,21 +1046,22 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
-; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: pmovsxbd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x8:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
%cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
@@ -1087,8 +1092,8 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x16_sext:
@@ -1097,8 +1102,9 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x16_sext:
@@ -1107,8 +1113,9 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
%cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
diff --git a/llvm/test/CodeGen/X86/icmp-pow2-diff.ll b/llvm/test/CodeGen/X86/icmp-pow2-diff.ll
index dada1726be424..3fc2a323b5dc1 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-diff.ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-diff.ll
@@ -151,7 +151,7 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $54, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm2 & (xmm0 ^ xmm1)
; AVX512-NEXT: retq
;
; AVX2-LABEL: andnot_ne_v8i16_todo_no_splat:
@@ -159,18 +159,19 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; SSE-LABEL: andnot_ne_v8i16_todo_no_splat:
; SSE: # %bb.0:
-; SSE-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pcmpeqw %xmm1, %xmm2
+; SSE-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: pcmpeqw %xmm2, %xmm1
; SSE-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pandn %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
%cmp1 = icmp ne <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%cmp2 = icmp ne <8 x i16> %x, <i16 -16385, i16 -257, i16 -33, i16 -8193, i16 -16385, i16 -257, i16 -33, i16 -8193>
@@ -184,7 +185,7 @@ define <8 x i1> @andnot_ne_v8i16(<8 x i16> %x) nounwind {
; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: andnot_ne_v8i16:
@@ -215,28 +216,29 @@ define <16 x i1> @andnot_ne_v16i8_fail_max_not_n1(<16 x i8> %x) nounwind {
; AVX512-LABEL: andnot_ne_v16i8_fail_max_not_n1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $54, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: andnot_ne_v16i8_fail_max_not_n1:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX2-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE-LABEL: andnot_ne_v16i8_fail_max_not_n1:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pcmpeqb %xmm1, %xmm2
-; SSE-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pcmpgtb %xmm0, %xmm2
+; SSE-NEXT: pandn %xmm2, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
%cmp1 = icmp ne <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%cmp2 = icmp ne <16 x i8> %x, <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
@@ -250,7 +252,7 @@ define <16 x i1> @andnot_ne_v16i8(<16 x i8> %x) nounwind {
; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm0
; AVX512-NEXT: retq
;
; AVX2-LABEL: andnot_ne_v16i8:
@@ -309,7 +311,7 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $86, %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm1 & (xmm0 ^ xmm2)
; AVX512-NEXT: retq
;
; AVX2-LABEL: addand_ne_v8i16_fail:
@@ -317,8 +319,8 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: addand_ne_v8i16_fail:
@@ -327,8 +329,9 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: addand_ne_v8i16_fail:
@@ -337,8 +340,9 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind {
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <8 x i16> %x, <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>
%cmp2 = icmp ne <8 x i16> %x, <i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381, i16 16381>
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index badfd1af940ca..478d80e9827a5 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -179,19 +179,23 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm1
; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm6
; CHECK-NOBMI-NEXT: pcmpgtd %xmm4, %xmm6
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NOBMI-NEXT: pand %xmm6, %xmm1
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
+; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm4
+; CHECK-NOBMI-NEXT: pandn %xmm4, %xmm1
; CHECK-NOBMI-NEXT: pxor %xmm5, %xmm3
; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm0
-; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm5
-; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm5
-; CHECK-NOBMI-NEXT: movdqa %xmm5, %xmm7
-; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2]
-; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
+; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm4
; CHECK-NOBMI-NEXT: pcmpeqd %xmm3, %xmm0
-; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; CHECK-NOBMI-NEXT: andps %xmm7, %xmm0
-; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3]
-; CHECK-NOBMI-NEXT: orps %xmm5, %xmm0
-; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm3
+; CHECK-NOBMI-NEXT: pandn %xmm3, %xmm0
+; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; CHECK-NOBMI-NEXT: retq
;
; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll
index c84a1159ad56a..0713f0bbe244c 100644
--- a/llvm/test/CodeGen/X86/machine-cp.ll
+++ b/llvm/test/CodeGen/X86/machine-cp.ll
@@ -100,55 +100,38 @@ define <16 x float> @foo(<16 x float> %x) {
; CHECK-LABEL: foo:
; CHECK: ## %bb.0: ## %bb
; CHECK-NEXT: xorps %xmm5, %xmm5
-; CHECK-NEXT: cvttps2dq %xmm3, %xmm8
+; CHECK-NEXT: cvttps2dq %xmm3, %xmm6
; CHECK-NEXT: movaps %xmm3, %xmm4
; CHECK-NEXT: cmpltps %xmm5, %xmm4
-; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16]
-; CHECK-NEXT: movaps %xmm4, %xmm6
-; CHECK-NEXT: orps %xmm7, %xmm6
-; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3
-; CHECK-NEXT: andps %xmm7, %xmm3
-; CHECK-NEXT: andps %xmm6, %xmm3
-; CHECK-NEXT: andnps %xmm4, %xmm6
-; CHECK-NEXT: cvttps2dq %xmm2, %xmm4
+; CHECK-NEXT: cvttps2dq %xmm2, %xmm3
; CHECK-NEXT: movaps %xmm2, %xmm7
; CHECK-NEXT: cmpltps %xmm5, %xmm7
-; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12]
-; CHECK-NEXT: movaps %xmm7, %xmm9
-; CHECK-NEXT: orps %xmm8, %xmm9
-; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2
-; CHECK-NEXT: andps %xmm8, %xmm2
-; CHECK-NEXT: andps %xmm9, %xmm2
-; CHECK-NEXT: andnps %xmm7, %xmm9
-; CHECK-NEXT: cvttps2dq %xmm1, %xmm4
-; CHECK-NEXT: cmpltps %xmm5, %xmm1
-; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8]
+; CHECK-NEXT: cvttps2dq %xmm1, %xmm2
; CHECK-NEXT: movaps %xmm1, %xmm8
-; CHECK-NEXT: orps %xmm7, %xmm8
-; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4
-; CHECK-NEXT: andps %xmm7, %xmm4
-; CHECK-NEXT: andps %xmm8, %xmm4
-; CHECK-NEXT: andnps %xmm1, %xmm8
+; CHECK-NEXT: cmpltps %xmm5, %xmm8
; CHECK-NEXT: cvttps2dq %xmm0, %xmm1
-; CHECK-NEXT: cmpltps %xmm5, %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm9
+; CHECK-NEXT: cmpltps %xmm5, %xmm9
; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4]
-; CHECK-NEXT: movaps %xmm0, %xmm7
-; CHECK-NEXT: orps %xmm5, %xmm7
-; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK-NEXT: andps %xmm5, %xmm1
-; CHECK-NEXT: andps %xmm7, %xmm1
-; CHECK-NEXT: andnps %xmm0, %xmm7
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
-; CHECK-NEXT: andps %xmm0, %xmm7
-; CHECK-NEXT: orps %xmm7, %xmm1
-; CHECK-NEXT: andps %xmm0, %xmm8
-; CHECK-NEXT: orps %xmm8, %xmm4
-; CHECK-NEXT: andps %xmm0, %xmm9
-; CHECK-NEXT: orps %xmm9, %xmm2
-; CHECK-NEXT: andps %xmm0, %xmm6
-; CHECK-NEXT: orps %xmm6, %xmm3
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: movaps %xmm4, %xmm1
+; CHECK-NEXT: orps %xmm5, %xmm9
+; CHECK-NEXT: movaps {{.*#+}} xmm10 = [5,6,7,8]
+; CHECK-NEXT: orps %xmm10, %xmm8
+; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12]
+; CHECK-NEXT: orps %xmm11, %xmm7
+; CHECK-NEXT: movaps {{.*#+}} xmm12 = [13,14,15,16]
+; CHECK-NEXT: orps %xmm12, %xmm4
+; CHECK-NEXT: cvtdq2ps %xmm1, %xmm0
+; CHECK-NEXT: cvtdq2ps %xmm2, %xmm1
+; CHECK-NEXT: cvtdq2ps %xmm3, %xmm2
+; CHECK-NEXT: cvtdq2ps %xmm6, %xmm3
+; CHECK-NEXT: andps %xmm5, %xmm0
+; CHECK-NEXT: andps %xmm9, %xmm0
+; CHECK-NEXT: andps %xmm10, %xmm1
+; CHECK-NEXT: andps %xmm8, %xmm1
+; CHECK-NEXT: andps %xmm11, %xmm2
+; CHECK-NEXT: andps %xmm7, %xmm2
+; CHECK-NEXT: andps %xmm12, %xmm3
+; CHECK-NEXT: andps %xmm4, %xmm3
; CHECK-NEXT: retq
bb:
%v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/mul-cmp.ll b/llvm/test/CodeGen/X86/mul-cmp.ll
index 0ee4601acf694..4fffb42bdc672 100644
--- a/llvm/test/CodeGen/X86/mul-cmp.ll
+++ b/llvm/test/CodeGen/X86/mul-cmp.ll
@@ -119,21 +119,21 @@ define <4 x i1> @mul_nsw_ne0_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: mul_nsw_ne0_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: pcmpeqd %xmm2, %xmm1
; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pandn %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mul_nsw_ne0_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%m = mul nsw <4 x i32> %x, %y
%r = icmp ne <4 x i32> %m, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index 88934a382bbfa..aeb8fe93930a0 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -8,34 +8,36 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; SSE2-LABEL: PR45808:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm3, %xmm5
-; SSE2-NEXT: pxor %xmm4, %xmm5
-; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: movdqa %xmm3, %xmm6
; SSE2-NEXT: pxor %xmm4, %xmm6
-; SSE2-NEXT: movdqa %xmm6, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: movdqa %xmm1, %xmm7
+; SSE2-NEXT: pxor %xmm4, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
; SSE2-NEXT: movdqa %xmm2, %xmm8
; SSE2-NEXT: pxor %xmm4, %xmm8
; SSE2-NEXT: pxor %xmm0, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm9
; SSE2-NEXT: pcmpgtd %xmm8, %xmm9
; SSE2-NEXT: movdqa %xmm9, %xmm10
-; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm7[0,2]
-; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm5[0,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
-; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm7[1,3]
; SSE2-NEXT: andps %xmm10, %xmm4
-; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm7[1,3]
-; SSE2-NEXT: orps %xmm4, %xmm9
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
-; SSE2-NEXT: pxor %xmm9, %xmm4
-; SSE2-NEXT: pxor %xmm5, %xmm5
-; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
-; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm5[1,3]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE2-NEXT: pxor %xmm9, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,2,3,3]
+; SSE2-NEXT: pandn %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm7, %xmm2
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll
index b12be7cb129d3..37bc8ded142c1 100644
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -1004,9 +1004,10 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32>
; SSE2-NEXT: pxor %xmm1, %xmm4
; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm4
; SSE2-NEXT: pxor %xmm2, %xmm4
-; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm4, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -1147,9 +1148,10 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64>
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
; SSE2-NEXT: por %xmm3, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm4
; SSE2-NEXT: pxor %xmm2, %xmm4
-; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm4, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: paddq %xmm1, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll
index f526db00df606..d97e603c636af 100644
--- a/llvm/test/CodeGen/X86/setcc-combine.ll
+++ b/llvm/test/CodeGen/X86/setcc-combine.ll
@@ -1020,9 +1020,9 @@ define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: retq
%na = xor <2 x i64> %a, <i64 -1, i64 -1>
%nb = xor <2 x i64> %b, <i64 -1, i64 -1>
diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll
index c98aae7fbf405..4b1225c7ac1d8 100644
--- a/llvm/test/CodeGen/X86/setcc-logic.ll
+++ b/llvm/test/CodeGen/X86/setcc-logic.ll
@@ -541,9 +541,10 @@ define <4 x i32> @and_icmps_const_1bit_diff_vec(<4 x i32> %x) {
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [44,60,44,60]
; CHECK-NEXT: pcmpeqd %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp ne <4 x i32> %x, <i32 44, i32 60, i32 44, i32 60>
%b = icmp ne <4 x i32> %x, <i32 60, i32 44, i32 60, i32 44>
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
index 2d0778853fecd..aad6abfa78c23 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
@@ -2401,16 +2401,16 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm3
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4
+; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; CHECK-AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
-; CHECK-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; CHECK-AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; CHECK-AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: pr51133:
@@ -2450,10 +2450,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3
; CHECK-AVX2-NEXT: vpsubb %ymm3, %ymm0, %ymm0
; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; CHECK-AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; CHECK-AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
-; CHECK-AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
-; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; CHECK-AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512VL-LABEL: pr51133:
diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
index 10dee14bdd1a0..82c157c207375 100644
--- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
@@ -37,9 +37,9 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; X64-NEXT: pand %xmm2, %xmm0
; X64-NEXT: pxor %xmm5, %xmm5
; X64-NEXT: pcmpgtd %xmm4, %xmm5
-; X64-NEXT: por %xmm2, %xmm5
-; X64-NEXT: pcmpeqd %xmm2, %xmm2
-; X64-NEXT: pxor %xmm5, %xmm2
+; X64-NEXT: pcmpeqd %xmm4, %xmm4
+; X64-NEXT: pxor %xmm5, %xmm4
+; X64-NEXT: pandn %xmm4, %xmm2
; X64-NEXT: por %xmm0, %xmm2
; X64-NEXT: pandn %xmm2, %xmm1
; X64-NEXT: por %xmm3, %xmm1
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
index 58fd6492f2ed5..00d122838dbc5 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
@@ -127,14 +127,21 @@ define <4 x i32> @in_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) {
;
; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
-; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
+; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
; CHECK-XOP: # %bb.0:
-; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
-; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
+; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
+; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
+; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, ptr%px, align 16
%y = load <4 x i32>, ptr%py, align 16
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index 84856aab85079..6e68b37bec98a 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -198,9 +198,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE2-NEXT: pand %xmm2, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: por %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
+; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-SSE2-NEXT: pxor %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pandn %xmm2, %xmm1
; CHECK-SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; CHECK-SSE2-NEXT: retq
;
@@ -223,9 +223,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE41-NEXT: pand %xmm2, %xmm1
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: por %xmm1, %xmm0
-; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-SSE41-NEXT: pxor %xmm0, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-SSE41-NEXT: pxor %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pandn %xmm2, %xmm1
; CHECK-SSE41-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; CHECK-SSE41-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
index ac4b25be5eb65..25ba593d47062 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
@@ -155,7 +155,7 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: ne_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -194,7 +194,7 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX512-LABEL: ne_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -233,7 +233,7 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX512-LABEL: ne_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -272,7 +272,7 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512-LABEL: ne_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: pandn %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: ge_v2i64:
@@ -349,7 +349,7 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: ge_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -388,7 +388,7 @@ define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX512-LABEL: ge_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -427,7 +427,7 @@ define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX512-LABEL: ge_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -466,7 +466,7 @@ define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512-LABEL: ge_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -606,9 +606,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: le_v2i64:
@@ -623,9 +623,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: pandn %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: le_v2i64:
@@ -657,7 +657,7 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: le_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -696,7 +696,7 @@ define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX512-LABEL: le_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -735,7 +735,7 @@ define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX512-LABEL: le_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -774,7 +774,7 @@ define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512-LABEL: le_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
index 9a0756edbce32..bd730e7dbefbc 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -155,7 +155,7 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: ne_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -194,7 +194,7 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX512-LABEL: ne_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -233,7 +233,7 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX512-LABEL: ne_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -272,7 +272,7 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512-LABEL: ne_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: pandn %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: ge_v2i64:
@@ -535,7 +535,7 @@ define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm1
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -594,7 +594,7 @@ define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -653,7 +653,7 @@ define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -696,7 +696,7 @@ define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -722,9 +722,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: le_v2i64:
@@ -739,9 +739,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: pandn %xmm2, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: le_v2i64:
@@ -960,7 +960,7 @@ define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm1
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1020,7 +1020,7 @@ define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1080,7 +1080,7 @@ define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -1123,7 +1123,7 @@ define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll
index c1045c7b72f2c..0fc298a2b4cd4 100644
--- a/llvm/test/CodeGen/X86/vec_compare.ll
+++ b/llvm/test/CodeGen/X86/vec_compare.ll
@@ -128,9 +128,9 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: retl
%C = icmp sge <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
@@ -150,9 +150,9 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: retl
%C = icmp sle <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
@@ -212,9 +212,9 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: retl
%C = icmp uge <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
@@ -234,9 +234,9 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: retl
%C = icmp ule <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll
index 370f88d644b57..048117dd43e66 100644
--- a/llvm/test/CodeGen/X86/vec_ctbits.ll
+++ b/llvm/test/CodeGen/X86/vec_ctbits.ll
@@ -52,9 +52,9 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind {
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlq $32, %xmm1
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $1, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -154,9 +154,9 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind {
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrld $16, %xmm1
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $1, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll
index 5a71878ea4579..ade6b5c8d6bdf 100644
--- a/llvm/test/CodeGen/X86/vec_setcc-2.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll
@@ -448,13 +448,14 @@ define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) {
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
-; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ule_v2i64_splat:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
index cfb5fac2fd7aa..990113b1ecc1e 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -33,9 +33,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlq $32, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -73,9 +73,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlq $32, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: pandn %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -311,9 +311,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlq $32, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -351,9 +351,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlq $32, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: pandn %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -586,9 +586,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -628,9 +628,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrld $16, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: pandn %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -835,9 +835,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -877,9 +877,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrld $16, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: pandn %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1081,9 +1081,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1117,9 +1117,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $8, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: pandn %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1289,9 +1289,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $8, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1325,9 +1325,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $8, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: pandn %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1498,9 +1498,9 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE2-NEXT: psrlw $4, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1531,9 +1531,9 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE3-NEXT: psrlw $4, %xmm1
; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE3-NEXT: pand %xmm2, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE3-NEXT: pxor %xmm1, %xmm3
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1661,9 +1661,9 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE2-NEXT: psrlw $4, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1694,9 +1694,9 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE3-NEXT: psrlw $4, %xmm1
; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE3-NEXT: pand %xmm2, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE3-NEXT: pxor %xmm1, %xmm0
+; SSE3-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE3-NEXT: pxor %xmm1, %xmm3
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index d35a365508d54..8c24aa50a626e 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -28,17 +28,17 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -56,28 +56,30 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpandn %ymm1, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1
+; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1
; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6
+; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
%out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
ret <8 x i64> %out
@@ -107,17 +109,17 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -135,28 +137,30 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpandn %ymm1, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1
+; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1
; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6
+; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
%out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
ret <8 x i64> %out
@@ -184,17 +188,17 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1
; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
@@ -214,34 +218,35 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1
; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpandn %ymm0, %ymm1, %ymm2
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm1 = ~zmm1
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm4
-; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1]
-; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5
; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm6 = ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[6],ymm4[6],ymm3[7],ymm4[7]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm6, %ymm6
-; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[4],ymm4[4],ymm3[5],ymm4[5]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpackuswb %ymm6, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpand %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[6],ymm4[6],ymm0[7],ymm4[7]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3
+; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
@@ -271,17 +276,17 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1
; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
@@ -301,34 +306,35 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1
; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpandn %ymm0, %ymm1, %ymm2
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm1 = ~zmm1
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm4
-; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1]
-; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5
; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3
-; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm6 = ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[6],ymm4[6],ymm3[7],ymm4[7]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm6, %ymm6
-; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[4],ymm4[4],ymm3[5],ymm4[5]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpackuswb %ymm6, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpand %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[6],ymm4[6],ymm0[7],ymm4[7]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3
+; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
index a1b277efde6ff..1473da6aac5ea 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
@@ -20,9 +20,9 @@ define <2 x i32> @illegal_ctlz(<2 x i32> %v1) {
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrld $16, %xmm1
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $1, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
index c1d30b6d5a995..d8e955c93581e 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
@@ -826,11 +826,11 @@ define <2 x i64> @ne_1_v2i64(<2 x i64> %0) {
; SSE-NEXT: pcmpgtd %xmm2, %xmm3
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE-NEXT: pand %xmm4, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pandn %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: ne_1_v2i64:
diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
index 97124f0a9d8d9..55f2258aad018 100644
--- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -117,9 +117,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: uge_v2i64:
@@ -136,9 +136,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: pandn %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: uge_v2i64:
@@ -170,9 +170,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ule_v2i64:
@@ -189,9 +189,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm2
+; SSE41-NEXT: pandn %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: ule_v2i64:
diff --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 833db0efbda89..90bbde645cd08 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -7,9 +7,9 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: pcmpeqq %xmm2, %xmm0
; CHECK-NEXT: pcmpeqq %xmm2, %xmm1
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm0, (%rdi)
; CHECK-NEXT: retq
%cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
@@ -32,19 +32,19 @@ define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
; CHECK-NEXT: movq %rcx, %xmm0
; CHECK-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
; CHECK-NEXT: pxor %xmm4, %xmm4
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm2
; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm5, %xmm5
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
-; CHECK-NEXT: por %xmm2, %xmm1
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm2
+; CHECK-NEXT: packssdw %xmm0, %xmm2
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pcmpeqq %xmm4, %xmm3
-; CHECK-NEXT: por %xmm0, %xmm3
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
; CHECK-NEXT: packssdw %xmm3, %xmm1
-; CHECK-NEXT: pxor %xmm5, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,2,2]
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,2,2,2]
; CHECK-NEXT: pslld $31, %xmm0
; CHECK-NEXT: psrad $31, %xmm0
-; CHECK-NEXT: pmovsxdq %xmm1, %xmm1
+; CHECK-NEXT: pmovsxdq %xmm2, %xmm1
; CHECK-NEXT: movdqa %xmm1, (%rdi)
; CHECK-NEXT: movq %xmm0, 16(%rdi)
; CHECK-NEXT: retq
>From e2032efe50c3c421c98575d822442dfde65dab71 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 06:56:43 +0000
Subject: [PATCH 15/20] [PowerPC]: Updated tests
---
llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll | 1 +
.../CodeGen/PowerPC/fp-strict-fcmp-spe.ll | 24 ++--
.../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll | 19 ++-
llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll | 65 +++++++++
.../CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll | 19 ++-
.../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 19 ++-
.../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 15 ++-
.../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 15 ++-
.../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll | 19 ++-
.../CodeGen/PowerPC/xxeval-vselect-x-nor.ll | 125 ++++++++++++++----
.../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 19 ++-
11 files changed, 268 insertions(+), 72 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index bea24ee98336d..ed8dc504f026a 100644
--- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | \
; RUN: grep eqv | count 3
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | \
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
index c20d319f2ac79..78644691fb646 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
@@ -113,14 +113,12 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; SPE-LABEL: test_f32_ueq_s:
; SPE: # %bb.0:
-; SPE-NEXT: efscmplt cr0, r5, r6
-; SPE-NEXT: bc 12, gt, .LBB7_3
-; SPE-NEXT: # %bb.1:
; SPE-NEXT: efscmpgt cr0, r5, r6
-; SPE-NEXT: bc 12, gt, .LBB7_3
-; SPE-NEXT: # %bb.2:
-; SPE-NEXT: mr r4, r3
-; SPE-NEXT: .LBB7_3:
+; SPE-NEXT: bc 12, gt, .LBB7_2
+; SPE-NEXT: # %bb.1:
+; SPE-NEXT: efscmplt cr0, r5, r6
+; SPE-NEXT: bclr 4, gt, 0
+; SPE-NEXT: .LBB7_2:
; SPE-NEXT: mr r3, r4
; SPE-NEXT: blr
%cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
@@ -355,14 +353,12 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r5, r5, r6
-; SPE-NEXT: efdcmplt cr0, r5, r7
-; SPE-NEXT: bc 12, gt, .LBB21_3
-; SPE-NEXT: # %bb.1:
; SPE-NEXT: efdcmpgt cr0, r5, r7
-; SPE-NEXT: bc 12, gt, .LBB21_3
-; SPE-NEXT: # %bb.2:
-; SPE-NEXT: mr r4, r3
-; SPE-NEXT: .LBB21_3:
+; SPE-NEXT: bc 12, gt, .LBB21_2
+; SPE-NEXT: # %bb.1:
+; SPE-NEXT: efdcmplt cr0, r5, r7
+; SPE-NEXT: bclr 4, gt, 0
+; SPE-NEXT: .LBB21_2:
; SPE-NEXT: mr r3, r4
; SPE-NEXT: blr
%cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
index c23daac80279b..872a08c20eae8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -1,29 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; Check the miscellaneous logical vector operations added in P8
-;
+;
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
; Test x eqv y
define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: test_veqv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: veqv 2, 2, 3
+; CHECK-NEXT: blr
%tmp = xor <4 x i32> %x, %y
%ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %ret_val
-; CHECK: veqv 2, 2, 3
}
; Test x vnand y
define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: test_vnand:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vnand 2, 2, 3
+; CHECK-NEXT: blr
%tmp = and <4 x i32> %x, %y
%ret_val = xor <4 x i32> %tmp, <i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %ret_val
-; CHECK: vnand 2, 2, 3
}
; Test x vorc y and variants
define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: test_vorc:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vor 2, 3, 2
+; CHECK-NEXT: blr
%tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp2 = or <4 x i32> %x, %tmp1
-; CHECK: vorc 3, 2, 3
%tmp3 = xor <4 x i32> %tmp2, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp4 = or <4 x i32> %tmp3, %x
-; CHECK: vorc 2, 2, 3
ret <4 x i32> %tmp4
}
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
index ba74df956e71e..7f7a52fe7de65 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -6,6 +7,10 @@
; CHECK: xxlandc v2, v2, v3
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_not(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_not:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxlandc v2, v2, v3
+; CHECK-NEXT: blr
entry:
%neg = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
%and = and <4 x i32> %neg, %A
@@ -17,6 +22,10 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <16 x i8> @and_and8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %A
%and1 = and <16 x i8> %and, %C
@@ -28,6 +37,10 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <8 x i16> @and_and16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %A
%and1 = and <8 x i16> %and, %C
@@ -39,6 +52,10 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_and32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %A
%and1 = and <4 x i32> %and, %C
@@ -50,6 +67,10 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <2 x i64> @and_and64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_and64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
+; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %A
%and1 = and <2 x i64> %and, %C
@@ -61,6 +82,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 14
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_nand:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 14
+; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %C, %B
%neg = xor <4 x i32> %and, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -73,6 +98,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 7
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_or:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 7
+; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %C, %B
%and = and <4 x i32> %or, %A
@@ -84,6 +113,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 8
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_nor:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 8
+; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %C, %B
%neg = xor <4 x i32> %or, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -96,6 +129,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 6
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_xor:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 6
+; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %C, %B
%and = and <4 x i32> %xor, %A
@@ -107,6 +144,10 @@ entry:
; CHECK: xxeval v2, v2, v3, v4, 9
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: and_eqv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 9
+; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg = xor <4 x i32> %xor, %C
@@ -119,6 +160,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 241
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_nand:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 241
+; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %C, %B
%A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -131,6 +176,10 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 254
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_and(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_and:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v3, v2, v4, 254
+; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %A
%and1 = and <4 x i32> %and, %C
@@ -143,6 +192,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 249
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_xor:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 249
+; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %C, %B
%and = and <4 x i32> %xor, %A
@@ -155,6 +208,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 246
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_eqv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 246
+; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %C, %B
%A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -167,6 +224,10 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 248
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_or:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v4, v3, 248
+; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %C, %B
%and = and <4 x i32> %or, %A
@@ -179,6 +240,10 @@ entry:
; CHECK: xxeval v2, v2, v3, v4, 247
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
+; CHECK-LABEL: nand_nor:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 247
+; CHECK-NEXT: blr
entry:
%A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
%or = or <4 x i32> %A.not, %B
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
index 6616a1e6e7e9f..ba5c9edb3897d 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
@@ -32,7 +32,10 @@ entry:
define dso_local <8 x i16> @eqvA_B_C(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 {
; CHECK-LABEL: eqvA_B_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 150
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxland vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %C
@@ -48,7 +51,8 @@ entry:
define dso_local <16 x i8> @norA_andB_C(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
; CHECK-LABEL: norA_andB_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 224
+; CHECK-NEXT: xxlnor vs0, v2, v2
+; CHECK-NEXT: xxeval v2, vs0, v3, v4, 14
; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %C
@@ -100,7 +104,8 @@ entry:
define dso_local <4 x i32> @norA_xorB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
; CHECK-LABEL: norA_xorB_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 144
+; CHECK-NEXT: xxlnor vs0, v2, v2
+; CHECK-NEXT: xxeval v2, vs0, v3, v4, 9
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -113,7 +118,9 @@ entry:
define dso_local <4 x i32> @norA_B_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
; CHECK-LABEL: norA_B_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 128
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: xxlnor vs1, v3, v3
+; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 16
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -164,7 +171,9 @@ entry:
define dso_local <4 x i32> @orA_norB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
; CHECK-LABEL: orA_norB_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 143
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: xxlnor vs1, v3, v3
+; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 31
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index b41220b01373a..f98edc21bf2ea 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -80,9 +80,11 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -97,10 +99,13 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -115,9 +120,12 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxland vs1, v3, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -132,9 +140,12 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxland vs1, v3, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index a51e392279d55..0baa420b79761 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -77,8 +77,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -92,10 +93,12 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -109,9 +112,11 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_nor_BC_B_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -125,9 +130,11 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_nor_BC_B_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 54bf6c03f8c1a..6fc822d729457 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -77,8 +77,9 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -92,10 +93,12 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -109,9 +112,11 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_nor_BC_C_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -125,9 +130,11 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_nor_BC_C_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
index ba7680b27cc17..78ae36cc0ecf7 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
@@ -84,9 +84,11 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxleqv vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -102,10 +104,13 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxleqv vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -121,9 +126,12 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxleqv vs1, v3, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -139,9 +147,12 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxleqv vs1, v3, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
index 369587454a7c1..90928e668afd8 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
@@ -15,9 +15,11 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -32,10 +34,13 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -50,9 +55,12 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxland vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %C
@@ -67,9 +75,12 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxland vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %C
@@ -85,8 +96,9 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT: xxsel v2, vs0, v3, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -100,10 +112,12 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_B_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT: xxsel v2, vs0, v3, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -117,9 +131,11 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_B_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT: xxsel v2, vs0, v3, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -133,9 +149,11 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_B_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
+; CHECK-NEXT: xxsel v2, vs0, v3, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
@@ -150,8 +168,9 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT: xxsel v2, vs0, v4, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -165,10 +184,12 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_C_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT: xxsel v2, vs0, v4, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -182,9 +203,11 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_C_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT: xxsel v2, vs0, v4, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -198,9 +221,11 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_C_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
+; CHECK-NEXT: xxsel v2, vs0, v4, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
@@ -214,9 +239,11 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -231,10 +258,13 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -249,9 +279,12 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlxor vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <16 x i8> %B, %C
@@ -266,9 +299,12 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlxor vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <8 x i16> %B, %C
@@ -283,9 +319,11 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -300,10 +338,13 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -318,9 +359,12 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxlnor vs1, v4, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT: xxsel v2, vs0, vs1, v2
; CHECK-NEXT: blr
entry:
%not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation
@@ -335,9 +379,12 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation
@@ -352,9 +399,11 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v3, v3
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -369,10 +418,13 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnor vs0, v3, v3
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
@@ -387,9 +439,12 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxlnor vs1, v3, v3
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT: xxsel v2, vs0, vs1, v2
; CHECK-NEXT: blr
entry:
%not = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation
@@ -404,9 +459,12 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnor vs0, v3, v3
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation
@@ -421,9 +479,11 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -439,10 +499,13 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -458,9 +521,12 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnand vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %C
@@ -476,9 +542,12 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs1, vs1, vs1
+; CHECK-NEXT: xxlnand vs0, v3, v4
+; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index 0fc296cc5a4e2..5031ebc930e11 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -267,9 +267,11 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
+; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
+; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -284,10 +286,13 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -302,9 +307,12 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxlxor vs1, v3, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -319,9 +327,12 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
+; CHECK-NEXT: xxleqv vs0, vs0, vs0
+; CHECK-NEXT: xxlxor vs1, v3, v4
+; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
>From 52185882fc9f24090c54db0649e4121320592de1 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 07:33:47 +0000
Subject: [PATCH 16/20] [X86]: Removed reversing of rewriting demorgan
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0b64ff370b10..e870514db2443 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55615,10 +55615,12 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
// Folds for better commutativity:
if (N1->hasOneUse()) {
+ /*
// ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
if (SDValue Not = IsNOT(N1, DAG))
return DAG.getNOT(
DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
+ */
// ANDNP(x,PSHUFB(y,z)) -> PSHUFB(y,OR(z,x))
// Zero out elements by setting the PSHUFB mask value to 0xFF.
>From de10f4a6eeae333b8e0972ff499f09015b72c203 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sat, 18 Oct 2025 07:34:18 +0000
Subject: [PATCH 17/20] [X86][PowerPC][AArch64]: Updated tests
---
llvm/test/CodeGen/AArch64/bsl.ll | 120 ++++----
.../CodeGen/AArch64/build-vector-dup-simd.ll | 24 +-
.../CodeGen/AArch64/fp16-v4-instructions.ll | 44 +--
.../CodeGen/AArch64/fp16-v8-instructions.ll | 50 +---
llvm/test/CodeGen/AArch64/sve2-bsl.ll | 36 +--
.../CodeGen/PowerPC/fp-strict-fcmp-spe.ll | 24 +-
.../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll | 3 +-
.../CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll | 10 +-
.../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 19 +-
.../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 15 +-
.../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 15 +-
.../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll | 19 +-
.../CodeGen/PowerPC/xxeval-vselect-x-nor.ll | 125 ++------
.../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 19 +-
llvm/test/CodeGen/X86/abds-vector-128.ll | 6 +-
.../test/CodeGen/X86/avx512-mask-bit-manip.ll | 25 +-
llvm/test/CodeGen/X86/combine-or.ll | 39 +--
llvm/test/CodeGen/X86/combine-srl.ll | 9 +-
.../CodeGen/X86/expand-vp-int-intrinsics.ll | 9 +-
llvm/test/CodeGen/X86/ispow2.ll | 24 +-
llvm/test/CodeGen/X86/machine-cp.ll | 67 +++--
llvm/test/CodeGen/X86/promote-cmp.ll | 34 +--
llvm/test/CodeGen/X86/setcc-combine.ll | 6 +-
.../X86/urem-seteq-vec-tautological.ll | 12 +-
llvm/test/CodeGen/X86/vec_cmp_sint-128.ll | 24 +-
llvm/test/CodeGen/X86/vec_cmp_uint-128.ll | 24 +-
llvm/test/CodeGen/X86/vec_compare.ll | 24 +-
llvm/test/CodeGen/X86/vec_ctbits.ll | 18 +-
llvm/test/CodeGen/X86/vec_setcc-2.ll | 13 +-
llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 248 ++++++++--------
llvm/test/CodeGen/X86/vector-lzcnt-512.ll | 276 +++++++++---------
llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll | 9 +-
llvm/test/CodeGen/X86/vector-popcnt-128.ll | 10 +-
llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 24 +-
34 files changed, 627 insertions(+), 797 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll
index fcf7393d2c801..df6b6f75b8935 100644
--- a/llvm/test/CodeGen/AArch64/bsl.ll
+++ b/llvm/test/CodeGen/AArch64/bsl.ll
@@ -32,19 +32,17 @@ define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
; NEON-LABEL: nbsl_v1i64:
; NEON: // %bb.0:
-; NEON-NEXT: and v0.8b, v2.8b, v0.8b
-; NEON-NEXT: bic v1.8b, v1.8b, v2.8b
+; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
; NEON-NEXT: mvn v0.8b, v0.8b
-; NEON-NEXT: bic v0.8b, v0.8b, v1.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v1i64:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b
-; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b
+; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
%4 = and <1 x i64> %2, %0
%5 = xor <1 x i64> %2, splat (i64 -1)
@@ -80,8 +78,9 @@ define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
; NEON-LABEL: bsl2n_v1i64:
; NEON: // %bb.0:
-; NEON-NEXT: mvn v1.8b, v1.8b
-; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT: and v0.8b, v2.8b, v0.8b
+; NEON-NEXT: orr v1.8b, v2.8b, v1.8b
+; NEON-NEXT: orn v0.8b, v0.8b, v1.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: bsl2n_v1i64:
@@ -119,19 +118,17 @@ define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
; NEON-LABEL: nbsl_v2i64:
; NEON: // %bb.0:
-; NEON-NEXT: and v0.16b, v2.16b, v0.16b
-; NEON-NEXT: bic v1.16b, v1.16b, v2.16b
+; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
; NEON-NEXT: mvn v0.16b, v0.16b
-; NEON-NEXT: bic v0.16b, v0.16b, v1.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v2i64:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b
-; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b
+; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
%4 = and <2 x i64> %2, %0
%5 = xor <2 x i64> %2, splat (i64 -1)
@@ -167,8 +164,9 @@ define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
; NEON-LABEL: bsl2n_v2i64:
; NEON: // %bb.0:
-; NEON-NEXT: mvn v1.16b, v1.16b
-; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT: and v0.16b, v2.16b, v0.16b
+; NEON-NEXT: orr v1.16b, v2.16b, v1.16b
+; NEON-NEXT: orn v0.16b, v0.16b, v1.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: bsl2n_v2i64:
@@ -191,18 +189,17 @@ define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
; NEON-LABEL: nbsl_v8i8:
; NEON: // %bb.0:
-; NEON-NEXT: and v3.8b, v2.8b, v1.8b
-; NEON-NEXT: and v0.8b, v2.8b, v0.8b
-; NEON-NEXT: orn v1.8b, v3.8b, v1.8b
-; NEON-NEXT: bic v0.8b, v1.8b, v0.8b
+; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT: mvn v0.8b, v0.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v8i8:
; SVE2: // %bb.0:
-; SVE2-NEXT: and v3.8b, v2.8b, v1.8b
-; SVE2-NEXT: and v0.8b, v2.8b, v0.8b
-; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b
-; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
%4 = and <8 x i8> %2, %0
%5 = xor <8 x i8> %2, splat (i8 -1)
@@ -215,18 +212,17 @@ define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
; NEON-LABEL: nbsl_v4i16:
; NEON: // %bb.0:
-; NEON-NEXT: and v3.8b, v2.8b, v1.8b
-; NEON-NEXT: and v0.8b, v2.8b, v0.8b
-; NEON-NEXT: orn v1.8b, v3.8b, v1.8b
-; NEON-NEXT: bic v0.8b, v1.8b, v0.8b
+; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
+; NEON-NEXT: mvn v0.8b, v0.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v4i16:
; SVE2: // %bb.0:
-; SVE2-NEXT: and v3.8b, v2.8b, v1.8b
-; SVE2-NEXT: and v0.8b, v2.8b, v0.8b
-; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b
-; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
%4 = and <4 x i16> %2, %0
%5 = xor <4 x i16> %2, splat (i16 -1)
@@ -239,19 +235,17 @@ define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
; NEON-LABEL: nbsl_v2i32:
; NEON: // %bb.0:
-; NEON-NEXT: and v0.8b, v2.8b, v0.8b
-; NEON-NEXT: bic v1.8b, v1.8b, v2.8b
+; NEON-NEXT: bif v0.8b, v1.8b, v2.8b
; NEON-NEXT: mvn v0.8b, v0.8b
-; NEON-NEXT: bic v0.8b, v0.8b, v1.8b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v2i32:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
-; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b
-; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b
+; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
%4 = and <2 x i32> %2, %0
%5 = xor <2 x i32> %2, splat (i32 -1)
@@ -264,18 +258,17 @@ define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
; NEON-LABEL: nbsl_v16i8:
; NEON: // %bb.0:
-; NEON-NEXT: and v3.16b, v2.16b, v1.16b
-; NEON-NEXT: and v0.16b, v2.16b, v0.16b
-; NEON-NEXT: orn v1.16b, v3.16b, v1.16b
-; NEON-NEXT: bic v0.16b, v1.16b, v0.16b
+; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT: mvn v0.16b, v0.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v16i8:
; SVE2: // %bb.0:
-; SVE2-NEXT: and v3.16b, v2.16b, v1.16b
-; SVE2-NEXT: and v0.16b, v2.16b, v0.16b
-; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b
-; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
%4 = and <16 x i8> %2, %0
%5 = xor <16 x i8> %2, splat (i8 -1)
@@ -288,18 +281,17 @@ define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
; NEON-LABEL: nbsl_v8i16:
; NEON: // %bb.0:
-; NEON-NEXT: and v3.16b, v2.16b, v1.16b
-; NEON-NEXT: and v0.16b, v2.16b, v0.16b
-; NEON-NEXT: orn v1.16b, v3.16b, v1.16b
-; NEON-NEXT: bic v0.16b, v1.16b, v0.16b
+; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
+; NEON-NEXT: mvn v0.16b, v0.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v8i16:
; SVE2: // %bb.0:
-; SVE2-NEXT: and v3.16b, v2.16b, v1.16b
-; SVE2-NEXT: and v0.16b, v2.16b, v0.16b
-; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b
-; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
+; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
%4 = and <8 x i16> %2, %0
%5 = xor <8 x i16> %2, splat (i16 -1)
@@ -312,19 +304,17 @@ define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
; NEON-LABEL: nbsl_v4i32:
; NEON: // %bb.0:
-; NEON-NEXT: and v0.16b, v2.16b, v0.16b
-; NEON-NEXT: bic v1.16b, v1.16b, v2.16b
+; NEON-NEXT: bif v0.16b, v1.16b, v2.16b
; NEON-NEXT: mvn v0.16b, v0.16b
-; NEON-NEXT: bic v0.16b, v0.16b, v1.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nbsl_v4i32:
; SVE2: // %bb.0:
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2
-; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b
-; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b
+; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
%4 = and <4 x i32> %2, %0
%5 = xor <4 x i32> %2, splat (i32 -1)
@@ -481,14 +471,16 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 {
define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 {
; NEON-LABEL: nor_q:
; NEON: // %bb.0:
-; NEON-NEXT: mvn v1.16b, v1.16b
-; NEON-NEXT: bic v0.16b, v1.16b, v0.16b
+; NEON-NEXT: orr v0.16b, v1.16b, v0.16b
+; NEON-NEXT: mvn v0.16b, v0.16b
; NEON-NEXT: ret
;
; SVE2-LABEL: nor_q:
; SVE2: // %bb.0:
-; SVE2-NEXT: mvn v1.16b, v1.16b
-; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z0.d
+; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
%3 = or <2 x i64> %1, %0
%4 = xor <2 x i64> %3, splat (i64 -1)
diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
index af7f9b6d471ad..ac0b8e89519dd 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
@@ -117,10 +117,10 @@ entry:
define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq:
; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmgt s2, s1, s0
-; CHECK-NOFULLFP16-NEXT: fcmgt s0, s0, s1
-; CHECK-NOFULLFP16-NEXT: mvn v1.8b, v2.8b
-; CHECK-NOFULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1
+; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
+; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
; CHECK-NOFULLFP16-NEXT: ret
;
; CHECK-NONANS-LABEL: dup_v1i32_ueq:
@@ -130,10 +130,10 @@ define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
;
; CHECK-FULLFP16-LABEL: dup_v1i32_ueq:
; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmgt s2, s1, s0
-; CHECK-FULLFP16-NEXT: fcmgt s0, s0, s1
-; CHECK-FULLFP16-NEXT: mvn v1.8b, v2.8b
-; CHECK-FULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1
+; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
+; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FULLFP16-NEXT: ret
entry:
%0 = fcmp ueq float %a, %b
@@ -260,10 +260,10 @@ entry:
define <1 x float> @dup_v1i32_uno(float %a, float %b) {
; CHECK-LABEL: dup_v1i32_uno:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmgt s2, s1, s0
-; CHECK-NEXT: fcmge s0, s0, s1
-; CHECK-NEXT: mvn v1.8b, v2.8b
-; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: fcmge s2, s0, s1
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.8b, v0.8b
; CHECK-NEXT: ret
entry:
%0 = fcmp uno float %a, %b
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 529b76cf84906..6233ce743b706 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -563,13 +563,13 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h
-; CHECK-FP16-SD-NEXT: fcmgt v0.4h, v0.4h, v1.4h
-; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b
-; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmgt v2.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
+; CHECK-FP16-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
; CHECK-CVT-GI: // %bb.0:
@@ -581,14 +581,6 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: fcmgt v2.4h, v0.4h, v1.4h
-; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-FP16-GI-NEXT: ret
%1 = fcmp ueq <4 x half> %a, %b
ret <4 x i1> %1
@@ -722,13 +714,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: test_fcmp_uno:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h
-; CHECK-FP16-SD-NEXT: fcmge v0.4h, v0.4h, v1.4h
-; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b
-; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: test_fcmp_uno:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmge v2.4h, v0.4h, v1.4h
+; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
+; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
+; CHECK-FP16-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_uno:
; CHECK-CVT-GI: // %bb.0:
@@ -740,14 +732,6 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-CVT-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_uno:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: fcmge v2.4h, v0.4h, v1.4h
-; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h
-; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-FP16-GI-NEXT: ret
%1 = fcmp uno <4 x half> %a, %b
ret <4 x i1> %1
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 6d67fc9ebe1c6..86763eb5f9e3b 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -990,14 +990,14 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: test_fcmp_ueq:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h
-; CHECK-FP16-SD-NEXT: fcmgt v0.8h, v0.8h, v1.8h
-; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmgt v2.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
+; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
+; CHECK-FP16-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
; CHECK-CVT-GI: // %bb.0:
@@ -1016,15 +1016,6 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_ueq:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: fcmgt v2.8h, v0.8h, v1.8h
-; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h
-; CHECK-FP16-GI-NEXT: ret
%1 = fcmp ueq <8 x half> %a, %b
ret <8 x i1> %1
}
@@ -1199,14 +1190,14 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: test_fcmp_uno:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h
-; CHECK-FP16-SD-NEXT: fcmge v0.8h, v0.8h, v1.8h
-; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b
-; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b
-; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: test_fcmp_uno:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcmge v2.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
+; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
+; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
+; CHECK-FP16-NEXT: ret
;
; CHECK-CVT-GI-LABEL: test_fcmp_uno:
; CHECK-CVT-GI: // %bb.0:
@@ -1225,15 +1216,6 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-CVT-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: test_fcmp_uno:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: fcmge v2.8h, v0.8h, v1.8h
-; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h
-; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h
-; CHECK-FP16-GI-NEXT: ret
%1 = fcmp uno <8 x half> %a, %b
ret <8 x i1> %1
}
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 80293388a5cf9..6cfe66eb8e633 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -46,9 +46,7 @@ define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: nbsl_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.b, #127 // =0x7f
-; CHECK-NEXT: and z1.b, z1.b, #0x80
-; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT: bic z0.d, z2.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%1 = and <vscale x 16 x i8> %a, splat(i8 127)
%2 = and <vscale x 16 x i8> %b, splat(i8 -128)
@@ -61,9 +59,7 @@ define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
; CHECK-LABEL: nbsl_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.h, #32767 // =0x7fff
-; CHECK-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT: bic z0.d, z2.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%1 = and <vscale x 8 x i16> %a, splat(i16 32767)
%2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
@@ -76,9 +72,7 @@ define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; CHECK-LABEL: nbsl_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.s, #0x7fffffff
-; CHECK-NEXT: and z1.s, z1.s, #0x80000000
-; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT: bic z0.d, z2.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
%2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
@@ -91,9 +85,7 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
; CHECK-LABEL: nbsl_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff
-; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
-; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d
-; CHECK-NEXT: bic z0.d, z2.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
%2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
@@ -123,9 +115,7 @@ define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x
define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
; CHECK-LABEL: codegen_nbsl_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%4 = and <vscale x 16 x i8> %2, %0
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
@@ -175,9 +165,7 @@ define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x
define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
; CHECK-LABEL: codegen_nbsl_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%4 = and <vscale x 8 x i16> %2, %0
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
@@ -227,9 +215,7 @@ define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x
define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
; CHECK-LABEL: codegen_nbsl_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%4 = and <vscale x 4 x i32> %2, %0
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
@@ -279,9 +265,7 @@ define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x
define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
; CHECK-LABEL: codegen_nbsl_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d
-; CHECK-NEXT: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%4 = and <vscale x 2 x i64> %2, %0
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
@@ -357,9 +341,7 @@ define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: nor:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z1.d, z1.d, z2.d
-; CHECK-NEXT: bic z0.d, z1.d, z0.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d
; CHECK-NEXT: ret
%3 = or <vscale x 2 x i64> %1, %0
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
index 78644691fb646..c20d319f2ac79 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll
@@ -113,12 +113,14 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; SPE-LABEL: test_f32_ueq_s:
; SPE: # %bb.0:
-; SPE-NEXT: efscmpgt cr0, r5, r6
-; SPE-NEXT: bc 12, gt, .LBB7_2
-; SPE-NEXT: # %bb.1:
; SPE-NEXT: efscmplt cr0, r5, r6
-; SPE-NEXT: bclr 4, gt, 0
-; SPE-NEXT: .LBB7_2:
+; SPE-NEXT: bc 12, gt, .LBB7_3
+; SPE-NEXT: # %bb.1:
+; SPE-NEXT: efscmpgt cr0, r5, r6
+; SPE-NEXT: bc 12, gt, .LBB7_3
+; SPE-NEXT: # %bb.2:
+; SPE-NEXT: mr r4, r3
+; SPE-NEXT: .LBB7_3:
; SPE-NEXT: mr r3, r4
; SPE-NEXT: blr
%cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
@@ -353,12 +355,14 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; SPE: # %bb.0:
; SPE-NEXT: evmergelo r7, r7, r8
; SPE-NEXT: evmergelo r5, r5, r6
-; SPE-NEXT: efdcmpgt cr0, r5, r7
-; SPE-NEXT: bc 12, gt, .LBB21_2
-; SPE-NEXT: # %bb.1:
; SPE-NEXT: efdcmplt cr0, r5, r7
-; SPE-NEXT: bclr 4, gt, 0
-; SPE-NEXT: .LBB21_2:
+; SPE-NEXT: bc 12, gt, .LBB21_3
+; SPE-NEXT: # %bb.1:
+; SPE-NEXT: efdcmpgt cr0, r5, r7
+; SPE-NEXT: bc 12, gt, .LBB21_3
+; SPE-NEXT: # %bb.2:
+; SPE-NEXT: mr r4, r3
+; SPE-NEXT: .LBB21_3:
; SPE-NEXT: mr r3, r4
; SPE-NEXT: blr
%cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
index 872a08c20eae8..310f0a66aa9b9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -28,7 +28,8 @@ define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: test_vorc:
; CHECK: # %bb.0:
-; CHECK-NEXT: vor 2, 3, 2
+; CHECK-NEXT: vorc 3, 2, 3
+; CHECK-NEXT: vorc 2, 2, 3
; CHECK-NEXT: blr
%tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp2 = or <4 x i32> %x, %tmp1
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
index ba5c9edb3897d..e391228fc95a9 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll
@@ -51,8 +51,7 @@ entry:
define dso_local <16 x i8> @norA_andB_C(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
; CHECK-LABEL: norA_andB_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor vs0, v2, v2
-; CHECK-NEXT: xxeval v2, vs0, v3, v4, 14
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 224
; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %C
@@ -104,8 +103,7 @@ entry:
define dso_local <4 x i32> @norA_xorB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
; CHECK-LABEL: norA_xorB_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor vs0, v2, v2
-; CHECK-NEXT: xxeval v2, vs0, v3, v4, 9
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 144
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -171,9 +169,7 @@ entry:
define dso_local <4 x i32> @orA_norB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
; CHECK-LABEL: orA_norB_C:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxlnor vs1, v3, v3
-; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 31
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 143
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index f98edc21bf2ea..b41220b01373a 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -80,11 +80,9 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -99,13 +97,10 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -120,12 +115,9 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxland vs1, v3, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -140,12 +132,9 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxland vs1, v3, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index 0baa420b79761..a51e392279d55 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -77,9 +77,8 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -93,12 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -112,11 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_nor_BC_B_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -130,11 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_nor_BC_B_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 6fc822d729457..54bf6c03f8c1a 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -77,9 +77,8 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -93,12 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -112,11 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_nor_BC_C_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -130,11 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_nor_BC_C_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
index 78ae36cc0ecf7..ba7680b27cc17 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll
@@ -84,11 +84,9 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxleqv vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -104,13 +102,10 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxleqv vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -126,12 +121,9 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxleqv vs1, v3, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -147,12 +139,9 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxleqv vs1, v3, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 152
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
index 90928e668afd8..369587454a7c1 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll
@@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -34,13 +32,10 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -55,12 +50,9 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxland vs0, v3, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %C
@@ -75,12 +67,9 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxland vs0, v3, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 129
; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %C
@@ -96,9 +85,8 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v3, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -112,12 +100,10 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_B_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v3, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -131,11 +117,9 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_B_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v3, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -149,11 +133,9 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_B_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v3, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 131
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
@@ -168,9 +150,8 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v4, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -184,12 +165,10 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_C_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v4, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -203,11 +182,9 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
; CHECK-LABEL: ternary_A_C_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v4, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -221,11 +198,9 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
; CHECK-LABEL: ternary_A_C_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v4, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 133
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
@@ -239,11 +214,9 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -258,13 +231,10 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -279,12 +249,9 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlxor vs0, v3, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
; CHECK-NEXT: blr
entry:
%xor = xor <16 x i8> %B, %C
@@ -299,12 +266,9 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlxor vs0, v3, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 134
; CHECK-NEXT: blr
entry:
%xor = xor <8 x i16> %B, %C
@@ -319,11 +283,9 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -338,13 +300,10 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -359,12 +318,9 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxlnor vs1, v4, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, vs1, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
; CHECK-NEXT: blr
entry:
%not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation
@@ -379,12 +335,9 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 138
; CHECK-NEXT: blr
entry:
%not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation
@@ -399,11 +352,9 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v3
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -418,13 +369,10 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnor vs0, v3, v3
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
@@ -439,12 +387,9 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxlnor vs1, v3, v3
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, vs1, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
; CHECK-NEXT: blr
entry:
%not = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation
@@ -459,12 +404,9 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1
; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnor vs0, v3, v3
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 140
; CHECK-NEXT: blr
entry:
%not = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation
@@ -479,11 +421,9 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -499,13 +439,10 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -521,12 +458,9 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnand vs0, v3, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %C
@@ -542,12 +476,9 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x
; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs1, vs1, vs1
-; CHECK-NEXT: xxlnand vs0, v3, v4
-; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 142
; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index 5031ebc930e11..0fc296cc5a4e2 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -267,11 +267,9 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -286,13 +284,10 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -307,12 +302,9 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v5, 7
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxlxor vs1, v3, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslb v2, v2, v5
; CHECK-NEXT: vsrab v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
; CHECK-NEXT: blr
entry:
%or = or <16 x i8> %B, %C
@@ -327,12 +319,9 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltiw v5, 983055
-; CHECK-NEXT: xxleqv vs0, vs0, vs0
-; CHECK-NEXT: xxlxor vs1, v3, v4
-; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96
; CHECK-NEXT: vslh v2, v2, v5
; CHECK-NEXT: vsrah v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
; CHECK-NEXT: blr
entry:
%or = or <8 x i16> %B, %C
diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll
index bc57a31f063b5..148be83892b72 100644
--- a/llvm/test/CodeGen/X86/abds-vector-128.ll
+++ b/llvm/test/CodeGen/X86/abds-vector-128.ll
@@ -756,9 +756,9 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
; SSE2-NEXT: pand %xmm6, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: paddq %xmm4, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
index 37df42ea2682d..3fcfb9d278da7 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll
@@ -714,19 +714,18 @@ define <64 x i8> @tzmsk_v64i8(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-NEXT: vpmovmskb %ymm4, %ecx
; AVX512F-NEXT: shlq $32, %rcx
; AVX512F-NEXT: leaq (%rax,%rcx), %rdx
-; AVX512F-NEXT: addq $-1, %rdx
-; AVX512F-NEXT: notq %rcx
-; AVX512F-NEXT: andnq %rcx, %rax, %rax
-; AVX512F-NEXT: andq %rax, %rdx
-; AVX512F-NEXT: movq %rdx, %rax
-; AVX512F-NEXT: movl %edx, %ecx
-; AVX512F-NEXT: kmovw %edx, %k1
-; AVX512F-NEXT: shrq $32, %rdx
-; AVX512F-NEXT: shrq $48, %rax
-; AVX512F-NEXT: shrl $16, %ecx
-; AVX512F-NEXT: kmovw %ecx, %k2
-; AVX512F-NEXT: kmovw %eax, %k3
-; AVX512F-NEXT: kmovw %edx, %k4
+; AVX512F-NEXT: addq %rcx, %rax
+; AVX512F-NEXT: addq $-1, %rax
+; AVX512F-NEXT: andnq %rax, %rdx, %rax
+; AVX512F-NEXT: movq %rax, %rcx
+; AVX512F-NEXT: movl %eax, %edx
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: shrq $32, %rax
+; AVX512F-NEXT: shrq $48, %rcx
+; AVX512F-NEXT: shrl $16, %edx
+; AVX512F-NEXT: kmovw %edx, %k2
+; AVX512F-NEXT: kmovw %ecx, %k3
+; AVX512F-NEXT: kmovw %eax, %k4
; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 8d5bbb4ae8e1e..8c91274abf3dd 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -183,32 +183,14 @@ define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind {
}
define i64 @or_build_pair_not(i32 %a0, i32 %a1) {
-; SSE-LABEL: or_build_pair_not:
-; SSE: # %bb.0:
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: shlq $32, %rsi
-; SSE-NEXT: movl %edi, %eax
-; SSE-NEXT: orq %rsi, %rax
-; SSE-NEXT: notq %rax
-; SSE-NEXT: retq
-;
-; AVX1-LABEL: or_build_pair_not:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX1-NEXT: shlq $32, %rsi
-; AVX1-NEXT: movl %edi, %eax
-; AVX1-NEXT: orq %rsi, %rax
-; AVX1-NEXT: notq %rax
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: or_build_pair_not:
-; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: shlq $32, %rsi
-; AVX2-NEXT: notq %rsi
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: andnq %rsi, %rax, %rax
-; AVX2-NEXT: retq
+; CHECK-LABEL: or_build_pair_not:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: shlq $32, %rsi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: notq %rax
+; CHECK-NEXT: retq
%n0 = xor i32 %a0, -1
%n1 = xor i32 %a1, -1
%x0 = zext i32 %n0 to i64
@@ -280,9 +262,10 @@ define i64 @PR89533(<64 x i8> %a0) {
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: shlq $32, %rcx
+; AVX2-NEXT: orq %rax, %rcx
; AVX2-NEXT: notq %rcx
-; AVX2-NEXT: andnq %rcx, %rax, %rax
-; AVX2-NEXT: tzcntq %rax, %rax
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: tzcntq %rcx, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cmp = icmp ne <64 x i8> %a0, <i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95>
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 4e31177023b08..21657bf67f233 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -437,12 +437,13 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $8, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrld $16, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
index 7919495821efd..905d1648564fb 100644
--- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
@@ -1487,12 +1487,13 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrld $8, %xmm1
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pandn %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pandn %xmm2, %xmm3
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: pandn %xmm3, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 478d80e9827a5..badfd1af940ca 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -179,23 +179,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm1
; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm6
; CHECK-NOBMI-NEXT: pcmpgtd %xmm4, %xmm6
-; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1
-; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; CHECK-NOBMI-NEXT: pand %xmm6, %xmm1
-; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
-; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm4
-; CHECK-NOBMI-NEXT: pandn %xmm4, %xmm1
; CHECK-NOBMI-NEXT: pxor %xmm5, %xmm3
; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm0
-; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
-; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm4
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm5
+; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NOBMI-NEXT: movdqa %xmm5, %xmm7
+; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2]
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1
; CHECK-NOBMI-NEXT: pcmpeqd %xmm3, %xmm0
-; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0
-; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
-; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm3
-; CHECK-NOBMI-NEXT: pandn %xmm3, %xmm0
-; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; CHECK-NOBMI-NEXT: andps %xmm7, %xmm0
+; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3]
+; CHECK-NOBMI-NEXT: orps %xmm5, %xmm0
+; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll
index 0713f0bbe244c..c84a1159ad56a 100644
--- a/llvm/test/CodeGen/X86/machine-cp.ll
+++ b/llvm/test/CodeGen/X86/machine-cp.ll
@@ -100,38 +100,55 @@ define <16 x float> @foo(<16 x float> %x) {
; CHECK-LABEL: foo:
; CHECK: ## %bb.0: ## %bb
; CHECK-NEXT: xorps %xmm5, %xmm5
-; CHECK-NEXT: cvttps2dq %xmm3, %xmm6
+; CHECK-NEXT: cvttps2dq %xmm3, %xmm8
; CHECK-NEXT: movaps %xmm3, %xmm4
; CHECK-NEXT: cmpltps %xmm5, %xmm4
-; CHECK-NEXT: cvttps2dq %xmm2, %xmm3
+; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16]
+; CHECK-NEXT: movaps %xmm4, %xmm6
+; CHECK-NEXT: orps %xmm7, %xmm6
+; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3
+; CHECK-NEXT: andps %xmm7, %xmm3
+; CHECK-NEXT: andps %xmm6, %xmm3
+; CHECK-NEXT: andnps %xmm4, %xmm6
+; CHECK-NEXT: cvttps2dq %xmm2, %xmm4
; CHECK-NEXT: movaps %xmm2, %xmm7
; CHECK-NEXT: cmpltps %xmm5, %xmm7
-; CHECK-NEXT: cvttps2dq %xmm1, %xmm2
+; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12]
+; CHECK-NEXT: movaps %xmm7, %xmm9
+; CHECK-NEXT: orps %xmm8, %xmm9
+; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2
+; CHECK-NEXT: andps %xmm8, %xmm2
+; CHECK-NEXT: andps %xmm9, %xmm2
+; CHECK-NEXT: andnps %xmm7, %xmm9
+; CHECK-NEXT: cvttps2dq %xmm1, %xmm4
+; CHECK-NEXT: cmpltps %xmm5, %xmm1
+; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8]
; CHECK-NEXT: movaps %xmm1, %xmm8
-; CHECK-NEXT: cmpltps %xmm5, %xmm8
+; CHECK-NEXT: orps %xmm7, %xmm8
+; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4
+; CHECK-NEXT: andps %xmm7, %xmm4
+; CHECK-NEXT: andps %xmm8, %xmm4
+; CHECK-NEXT: andnps %xmm1, %xmm8
; CHECK-NEXT: cvttps2dq %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm0, %xmm9
-; CHECK-NEXT: cmpltps %xmm5, %xmm9
+; CHECK-NEXT: cmpltps %xmm5, %xmm0
; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4]
-; CHECK-NEXT: orps %xmm5, %xmm9
-; CHECK-NEXT: movaps {{.*#+}} xmm10 = [5,6,7,8]
-; CHECK-NEXT: orps %xmm10, %xmm8
-; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12]
-; CHECK-NEXT: orps %xmm11, %xmm7
-; CHECK-NEXT: movaps {{.*#+}} xmm12 = [13,14,15,16]
-; CHECK-NEXT: orps %xmm12, %xmm4
-; CHECK-NEXT: cvtdq2ps %xmm1, %xmm0
-; CHECK-NEXT: cvtdq2ps %xmm2, %xmm1
-; CHECK-NEXT: cvtdq2ps %xmm3, %xmm2
-; CHECK-NEXT: cvtdq2ps %xmm6, %xmm3
-; CHECK-NEXT: andps %xmm5, %xmm0
-; CHECK-NEXT: andps %xmm9, %xmm0
-; CHECK-NEXT: andps %xmm10, %xmm1
-; CHECK-NEXT: andps %xmm8, %xmm1
-; CHECK-NEXT: andps %xmm11, %xmm2
-; CHECK-NEXT: andps %xmm7, %xmm2
-; CHECK-NEXT: andps %xmm12, %xmm3
-; CHECK-NEXT: andps %xmm4, %xmm3
+; CHECK-NEXT: movaps %xmm0, %xmm7
+; CHECK-NEXT: orps %xmm5, %xmm7
+; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
+; CHECK-NEXT: andps %xmm5, %xmm1
+; CHECK-NEXT: andps %xmm7, %xmm1
+; CHECK-NEXT: andnps %xmm0, %xmm7
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-NEXT: andps %xmm0, %xmm7
+; CHECK-NEXT: orps %xmm7, %xmm1
+; CHECK-NEXT: andps %xmm0, %xmm8
+; CHECK-NEXT: orps %xmm8, %xmm4
+; CHECK-NEXT: andps %xmm0, %xmm9
+; CHECK-NEXT: orps %xmm9, %xmm2
+; CHECK-NEXT: andps %xmm0, %xmm6
+; CHECK-NEXT: orps %xmm6, %xmm3
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movaps %xmm4, %xmm1
; CHECK-NEXT: retq
bb:
%v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index aeb8fe93930a0..88934a382bbfa 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -8,36 +8,34 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; SSE2-LABEL: PR45808:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm1, %xmm6
; SSE2-NEXT: pxor %xmm4, %xmm6
-; SSE2-NEXT: movdqa %xmm1, %xmm7
-; SSE2-NEXT: pxor %xmm4, %xmm7
-; SSE2-NEXT: movdqa %xmm7, %xmm5
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
; SSE2-NEXT: movdqa %xmm2, %xmm8
; SSE2-NEXT: pxor %xmm4, %xmm8
; SSE2-NEXT: pxor %xmm0, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm9
; SSE2-NEXT: pcmpgtd %xmm8, %xmm9
; SSE2-NEXT: movdqa %xmm9, %xmm10
-; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm5[0,2]
-; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm7[0,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
; SSE2-NEXT: pcmpeqd %xmm8, %xmm4
-; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm7[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3]
; SSE2-NEXT: andps %xmm10, %xmm4
-; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm5[1,3]
-; SSE2-NEXT: pcmpeqd %xmm6, %xmm6
-; SSE2-NEXT: pxor %xmm9, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,2,3,3]
-; SSE2-NEXT: pandn %xmm6, %xmm4
-; SSE2-NEXT: pxor %xmm6, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
-; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm7[1,3]
+; SSE2-NEXT: orps %xmm4, %xmm9
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm9, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
-; SSE2-NEXT: por %xmm7, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3]
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll
index d97e603c636af..f526db00df606 100644
--- a/llvm/test/CodeGen/X86/setcc-combine.ll
+++ b/llvm/test/CodeGen/X86/setcc-combine.ll
@@ -1020,9 +1020,9 @@ define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%na = xor <2 x i64> %a, <i64 -1, i64 -1>
%nb = xor <2 x i64> %b, <i64 -1, i64 -1>
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index 6e68b37bec98a..84856aab85079 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -198,9 +198,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE2-NEXT: pand %xmm2, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-SSE2-NEXT: pxor %xmm0, %xmm2
-; CHECK-SSE2-NEXT: pandn %xmm2, %xmm1
+; CHECK-SSE2-NEXT: por %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
; CHECK-SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; CHECK-SSE2-NEXT: retq
;
@@ -223,9 +223,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE41-NEXT: pand %xmm2, %xmm1
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-SSE41-NEXT: pxor %xmm0, %xmm2
-; CHECK-SSE41-NEXT: pandn %xmm2, %xmm1
+; CHECK-SSE41-NEXT: por %xmm1, %xmm0
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pxor %xmm0, %xmm1
; CHECK-SSE41-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; CHECK-SSE41-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
index 25ba593d47062..63e08de7fdf53 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
-; SSE41-NEXT: pandn %xmm2, %xmm0
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: ge_v2i64:
@@ -606,9 +606,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: le_v2i64:
@@ -623,9 +623,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
-; SSE41-NEXT: pandn %xmm2, %xmm0
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: le_v2i64:
diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
index bd730e7dbefbc..9d65ff94061b0 100644
--- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ge_v2i64:
@@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
-; SSE41-NEXT: pandn %xmm2, %xmm0
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: ge_v2i64:
@@ -722,9 +722,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: le_v2i64:
@@ -739,9 +739,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
-; SSE41-NEXT: pandn %xmm2, %xmm0
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: le_v2i64:
diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll
index 0fc298a2b4cd4..c1045c7b72f2c 100644
--- a/llvm/test/CodeGen/X86/vec_compare.ll
+++ b/llvm/test/CodeGen/X86/vec_compare.ll
@@ -128,9 +128,9 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retl
%C = icmp sge <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
@@ -150,9 +150,9 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retl
%C = icmp sle <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
@@ -212,9 +212,9 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retl
%C = icmp uge <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
@@ -234,9 +234,9 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retl
%C = icmp ule <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll
index 048117dd43e66..4a3bcbb0a96a4 100644
--- a/llvm/test/CodeGen/X86/vec_ctbits.ll
+++ b/llvm/test/CodeGen/X86/vec_ctbits.ll
@@ -49,12 +49,13 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind {
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlq $16, %xmm1
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlq $32, %xmm1
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm3
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: psrlq $32, %xmm0
+; CHECK-NEXT: pandn %xmm3, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $1, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -151,12 +152,13 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind {
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrld $8, %xmm1
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrld $16, %xmm1
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm3
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: pandn %xmm3, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $1, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll
index ade6b5c8d6bdf..5a71878ea4579 100644
--- a/llvm/test/CodeGen/X86/vec_setcc-2.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll
@@ -448,14 +448,13 @@ define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) {
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
-; SSE2-NEXT: pxor %xmm2, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ule_v2i64_splat:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
index 990113b1ecc1e..716090abf1c4a 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -30,12 +30,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlq $16, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlq $32, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrlq $32, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -70,12 +71,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlq $16, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlq $32, %xmm1
; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
; SSE3-NEXT: pxor %xmm1, %xmm2
-; SSE3-NEXT: pandn %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
+; SSE3-NEXT: por %xmm1, %xmm0
+; SSE3-NEXT: psrlq $32, %xmm0
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -308,12 +310,13 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlq $16, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlq $32, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrlq $32, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -348,12 +351,13 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlq $16, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlq $32, %xmm1
; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
; SSE3-NEXT: pxor %xmm1, %xmm2
-; SSE3-NEXT: pandn %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
+; SSE3-NEXT: por %xmm1, %xmm0
+; SSE3-NEXT: psrlq $32, %xmm0
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -583,12 +587,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $8, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrld $16, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -625,12 +630,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrld $8, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrld $16, %xmm1
; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
; SSE3-NEXT: pxor %xmm1, %xmm2
-; SSE3-NEXT: pandn %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
+; SSE3-NEXT: por %xmm1, %xmm0
+; SSE3-NEXT: psrld $16, %xmm0
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -832,12 +838,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $8, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrld $16, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -874,12 +881,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrld $8, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrld $16, %xmm1
; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
; SSE3-NEXT: pxor %xmm1, %xmm2
-; SSE3-NEXT: pandn %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
+; SSE3-NEXT: por %xmm1, %xmm0
+; SSE3-NEXT: psrld $16, %xmm0
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1078,12 +1086,13 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrlw $8, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1114,12 +1123,13 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $8, %xmm1
; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
; SSE3-NEXT: pxor %xmm1, %xmm2
-; SSE3-NEXT: pandn %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
+; SSE3-NEXT: por %xmm1, %xmm0
+; SSE3-NEXT: psrlw $8, %xmm0
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1286,12 +1296,13 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $8, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrlw $8, %xmm0
+; SSE2-NEXT: pandn %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1322,12 +1333,13 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $8, %xmm1
; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
; SSE3-NEXT: pxor %xmm1, %xmm2
-; SSE3-NEXT: pandn %xmm2, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
+; SSE3-NEXT: por %xmm1, %xmm0
+; SSE3-NEXT: psrlw $8, %xmm0
+; SSE3-NEXT: pandn %xmm3, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1493,28 +1505,29 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $2, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
-; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pandn %xmm3, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $1, %xmm1
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: psubb %xmm1, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrlw $1, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: psubb %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pand %xmm2, %xmm3
; SSE2-NEXT: psrlw $2, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: paddb %xmm3, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: paddb %xmm1, %xmm0
; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: paddb %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrlw $4, %xmm2
+; SSE2-NEXT: paddb %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv16i8:
@@ -1526,28 +1539,29 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $2, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE3-NEXT: pand %xmm2, %xmm1
-; SSE3-NEXT: pcmpeqd %xmm3, %xmm3
-; SSE3-NEXT: pxor %xmm1, %xmm3
+; SSE3-NEXT: psrlw $4, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE3-NEXT: pand %xmm1, %xmm0
; SSE3-NEXT: pandn %xmm3, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $1, %xmm1
-; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: psubb %xmm1, %xmm0
-; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: psrlw $1, %xmm2
+; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE3-NEXT: psubb %xmm2, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE3-NEXT: movdqa %xmm0, %xmm3
-; SSE3-NEXT: pand %xmm1, %xmm3
+; SSE3-NEXT: pand %xmm2, %xmm3
; SSE3-NEXT: psrlw $2, %xmm0
-; SSE3-NEXT: pand %xmm1, %xmm0
-; SSE3-NEXT: paddb %xmm3, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: paddb %xmm1, %xmm0
; SSE3-NEXT: pand %xmm2, %xmm0
+; SSE3-NEXT: paddb %xmm3, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: psrlw $4, %xmm2
+; SSE3-NEXT: paddb %xmm2, %xmm0
+; SSE3-NEXT: pand %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv16i8:
@@ -1656,28 +1670,29 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $2, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
-; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pandn %xmm3, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $1, %xmm1
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: psubb %xmm1, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrlw $1, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: psubb %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pand %xmm2, %xmm3
; SSE2-NEXT: psrlw $2, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: paddb %xmm3, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: paddb %xmm1, %xmm0
; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: paddb %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrlw $4, %xmm2
+; SSE2-NEXT: paddb %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv16i8u:
@@ -1689,28 +1704,29 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $2, %xmm1
; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE3-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE3-NEXT: pxor %xmm1, %xmm2
+; SSE3-NEXT: movdqa %xmm0, %xmm3
+; SSE3-NEXT: pandn %xmm2, %xmm3
; SSE3-NEXT: por %xmm1, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE3-NEXT: pand %xmm2, %xmm1
-; SSE3-NEXT: pcmpeqd %xmm3, %xmm3
-; SSE3-NEXT: pxor %xmm1, %xmm3
+; SSE3-NEXT: psrlw $4, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE3-NEXT: pand %xmm1, %xmm0
; SSE3-NEXT: pandn %xmm3, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $1, %xmm1
-; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: psubb %xmm1, %xmm0
-; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: psrlw $1, %xmm2
+; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE3-NEXT: psubb %xmm2, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE3-NEXT: movdqa %xmm0, %xmm3
-; SSE3-NEXT: pand %xmm1, %xmm3
+; SSE3-NEXT: pand %xmm2, %xmm3
; SSE3-NEXT: psrlw $2, %xmm0
-; SSE3-NEXT: pand %xmm1, %xmm0
-; SSE3-NEXT: paddb %xmm3, %xmm0
-; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: paddb %xmm1, %xmm0
; SSE3-NEXT: pand %xmm2, %xmm0
+; SSE3-NEXT: paddb %xmm3, %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: psrlw $4, %xmm2
+; SSE3-NEXT: paddb %xmm2, %xmm0
+; SSE3-NEXT: pand %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv16i8u:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index 8c24aa50a626e..8fe00afe0c0bb 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -26,19 +26,18 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; AVX512BW-NEXT: vpsrlq $8, %zmm0, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlq $32, %zmm2, %zmm3
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -54,32 +53,31 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; AVX512DQ-NEXT: vpsrlq $8, %zmm0, %zmm1
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1
-; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4
+; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT: vpsrlq $32, %zmm2, %zmm3
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm0
+; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm4
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1
-; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6
-; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrlq $32, %ymm2, %ymm6
+; AVX512DQ-NEXT: vpor %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpandn %ymm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm1
+; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
%out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
ret <8 x i64> %out
@@ -107,19 +105,18 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
; AVX512BW-NEXT: vpsrlq $8, %zmm0, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlq $32, %zmm2, %zmm3
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -135,32 +132,31 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
; AVX512DQ-NEXT: vpsrlq $8, %zmm0, %zmm1
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1
-; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4
+; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT: vpsrlq $32, %zmm2, %zmm3
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm0
+; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm4
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1
-; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6
-; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
-; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrlq $32, %ymm2, %ymm6
+; AVX512DQ-NEXT: vpor %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpandn %ymm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm1
+; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
%out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
ret <8 x i64> %out
@@ -186,19 +182,18 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrld $16, %zmm2, %zmm3
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
@@ -216,39 +211,38 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm3
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm0 = ~(zmm0 | zmm3 | zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm5
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5
-; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3
-; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
-; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7
-; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
-; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0
-; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1
-; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT: vpaddb %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[6],ymm5[6],ymm1[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[4],ymm5[4],ymm1[5],ymm5[5]
; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpandn %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpshufb %ymm2, %ymm6, %ymm2
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm2, %ymm2
; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
%out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
ret <16 x i32> %out
@@ -274,19 +268,18 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1)
-; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3
-; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3
-; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1)
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrld $16, %zmm2, %zmm3
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3)
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1)
+; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0
+; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2
@@ -304,39 +297,38 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1)
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm2
+; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm3
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm0 = ~(zmm0 | zmm3 | zmm1)
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5
+; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm5
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5
-; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3
-; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7]
-; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7
-; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5]
-; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0
-; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1
-; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0
-; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT: vpaddb %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[6],ymm5[6],ymm1[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7
+; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[4],ymm5[4],ymm1[5],ymm5[5]
; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpandn %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpshufb %ymm2, %ymm6, %ymm2
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7]
+; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm2, %ymm2
; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5]
; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
%out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
ret <16 x i32> %out
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
index 1473da6aac5ea..555d033ac5ee4 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll
@@ -17,12 +17,13 @@ define <2 x i32> @illegal_ctlz(<2 x i32> %v1) {
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrld $8, %xmm1
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrld $16, %xmm1
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
; CHECK-NEXT: pxor %xmm1, %xmm2
-; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm3
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: pandn %xmm3, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $1, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
index d8e955c93581e..c1d30b6d5a995 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
@@ -826,11 +826,11 @@ define <2 x i64> @ne_1_v2i64(<2 x i64> %0) {
; SSE-NEXT: pcmpgtd %xmm2, %xmm3
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE-NEXT: pand %xmm4, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
-; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pandn %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: ne_1_v2i64:
diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
index 55f2258aad018..97124f0a9d8d9 100644
--- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -117,9 +117,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: uge_v2i64:
@@ -136,9 +136,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
-; SSE41-NEXT: pandn %xmm2, %xmm0
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: uge_v2i64:
@@ -170,9 +170,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm2, %xmm0
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ule_v2i64:
@@ -189,9 +189,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm1, %xmm2
-; SSE41-NEXT: pandn %xmm2, %xmm0
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: ule_v2i64:
>From c95c10a91b7c657f514282bd821efb99c5c549d2 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 22 Oct 2025 18:48:51 +0000
Subject: [PATCH 18/20] [LoongArch][SystemZ]: Updated tests
---
.../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 141 +++++++++++-------
llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll | 20 ++-
llvm/test/CodeGen/SystemZ/vec-eval.ll | 49 +++---
3 files changed, 136 insertions(+), 74 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 27be02c50f1c7..4c5eab036dbb4 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -21,13 +21,15 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
; LA32R: # %bb.0:
; LA32R-NEXT: andi $a1, $a0, 254
; LA32R-NEXT: srli.w $a1, $a1, 1
+; LA32R-NEXT: nor $a2, $a0, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: andi $a1, $a0, 252
; LA32R-NEXT: srli.w $a1, $a1, 2
+; LA32R-NEXT: andn $a2, $a2, $a1
; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: andi $a1, $a0, 240
-; LA32R-NEXT: srli.w $a1, $a1, 4
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: andi $a0, $a0, 240
+; LA32R-NEXT: srli.w $a0, $a0, 4
+; LA32R-NEXT: andn $a0, $a2, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: andi $a1, $a1, 85
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -60,23 +62,28 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
define i16 @test_ctlz_i16(i16 %a) nounwind {
; LA32R-LABEL: test_ctlz_i16:
; LA32R: # %bb.0:
+; LA32R-NEXT: srli.w $a1, $a0, 1
+; LA32R-NEXT: lu12i.w $a2, 7
+; LA32R-NEXT: ori $a2, $a2, 4095
+; LA32R-NEXT: and $a1, $a1, $a2
+; LA32R-NEXT: nor $a2, $a0, $zero
+; LA32R-NEXT: andn $a2, $a2, $a1
+; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: lu12i.w $a1, 15
-; LA32R-NEXT: ori $a2, $a1, 4094
-; LA32R-NEXT: and $a2, $a0, $a2
-; LA32R-NEXT: srli.w $a2, $a2, 1
-; LA32R-NEXT: or $a0, $a0, $a2
-; LA32R-NEXT: ori $a2, $a1, 4092
-; LA32R-NEXT: and $a2, $a0, $a2
-; LA32R-NEXT: srli.w $a2, $a2, 2
-; LA32R-NEXT: or $a0, $a0, $a2
-; LA32R-NEXT: ori $a2, $a1, 4080
-; LA32R-NEXT: and $a2, $a0, $a2
-; LA32R-NEXT: srli.w $a2, $a2, 4
-; LA32R-NEXT: or $a0, $a0, $a2
+; LA32R-NEXT: ori $a3, $a1, 4092
+; LA32R-NEXT: and $a3, $a0, $a3
+; LA32R-NEXT: srli.w $a3, $a3, 2
+; LA32R-NEXT: andn $a2, $a2, $a3
+; LA32R-NEXT: or $a0, $a0, $a3
+; LA32R-NEXT: ori $a3, $a1, 4080
+; LA32R-NEXT: and $a3, $a0, $a3
+; LA32R-NEXT: srli.w $a3, $a3, 4
+; LA32R-NEXT: andn $a2, $a2, $a3
+; LA32R-NEXT: or $a0, $a0, $a3
; LA32R-NEXT: ori $a1, $a1, 3840
-; LA32R-NEXT: and $a1, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a1, 8
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: and $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a0, $a0, 8
+; LA32R-NEXT: andn $a0, $a2, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 5
; LA32R-NEXT: ori $a2, $a2, 1365
@@ -117,15 +124,19 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; LA32R-LABEL: test_ctlz_i32:
; LA32R: # %bb.0:
; LA32R-NEXT: srli.w $a1, $a0, 1
+; LA32R-NEXT: nor $a2, $a0, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 2
+; LA32R-NEXT: andn $a2, $a2, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 4
+; LA32R-NEXT: andn $a2, $a2, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 8
+; LA32R-NEXT: andn $a2, $a2, $a1
; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 16
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a0, $a0, 16
+; LA32R-NEXT: andn $a0, $a2, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 349525
; LA32R-NEXT: ori $a2, $a2, 1365
@@ -175,15 +186,19 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; LA32R-NEXT: bne $a1, $zero, .LBB3_2
; LA32R-NEXT: # %bb.1:
; LA32R-NEXT: srli.w $a1, $a0, 1
+; LA32R-NEXT: nor $a6, $a0, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 2
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 4
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 8
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 16
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a0, $a0, 16
+; LA32R-NEXT: andn $a0, $a6, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: and $a1, $a1, $a5
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -201,15 +216,19 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; LA32R-NEXT: ret
; LA32R-NEXT: .LBB3_2:
; LA32R-NEXT: srli.w $a0, $a1, 1
+; LA32R-NEXT: nor $a6, $a1, $a0
; LA32R-NEXT: or $a0, $a1, $a0
; LA32R-NEXT: srli.w $a1, $a0, 2
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 4
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 8
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 16
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a0, $a0, 16
+; LA32R-NEXT: andn $a0, $a6, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: and $a1, $a1, $a5
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -250,14 +269,17 @@ define i8 @test_not_ctlz_i8(i8 %a) nounwind {
; LA32R: # %bb.0:
; LA32R-NEXT: ori $a1, $zero, 254
; LA32R-NEXT: andn $a1, $a1, $a0
+; LA32R-NEXT: nor $a2, $a0, $zero
; LA32R-NEXT: srli.w $a1, $a1, 1
+; LA32R-NEXT: nor $a2, $a2, $a1
; LA32R-NEXT: orn $a0, $a1, $a0
; LA32R-NEXT: andi $a1, $a0, 252
; LA32R-NEXT: srli.w $a1, $a1, 2
+; LA32R-NEXT: andn $a2, $a2, $a1
; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: andi $a1, $a0, 240
-; LA32R-NEXT: srli.w $a1, $a1, 4
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: andi $a0, $a0, 240
+; LA32R-NEXT: srli.w $a0, $a0, 4
+; LA32R-NEXT: andn $a0, $a2, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: andi $a1, $a1, 85
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -293,19 +315,22 @@ define i16 @test_not_ctlz_i16(i16 %a) nounwind {
; LA32R-NEXT: ori $a2, $a1, 4094
; LA32R-NEXT: andn $a2, $a2, $a0
; LA32R-NEXT: srli.w $a2, $a2, 1
+; LA32R-NEXT: andn $a3, $a0, $a2
; LA32R-NEXT: orn $a0, $a2, $a0
; LA32R-NEXT: ori $a2, $a1, 4092
; LA32R-NEXT: and $a2, $a0, $a2
; LA32R-NEXT: srli.w $a2, $a2, 2
+; LA32R-NEXT: andn $a3, $a3, $a2
; LA32R-NEXT: or $a0, $a0, $a2
; LA32R-NEXT: ori $a2, $a1, 4080
; LA32R-NEXT: and $a2, $a0, $a2
; LA32R-NEXT: srli.w $a2, $a2, 4
+; LA32R-NEXT: andn $a3, $a3, $a2
; LA32R-NEXT: or $a0, $a0, $a2
; LA32R-NEXT: ori $a1, $a1, 3840
-; LA32R-NEXT: and $a1, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a1, 8
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: and $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a0, $a0, 8
+; LA32R-NEXT: andn $a0, $a3, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 5
; LA32R-NEXT: ori $a2, $a2, 1365
@@ -345,16 +370,20 @@ define i32 @test_not_ctlz_i32(i32 %a) nounwind {
; LA32R-LABEL: test_not_ctlz_i32:
; LA32R: # %bb.0:
; LA32R-NEXT: nor $a1, $a0, $zero
-; LA32R-NEXT: srli.w $a1, $a1, 1
-; LA32R-NEXT: orn $a0, $a1, $a0
-; LA32R-NEXT: srli.w $a1, $a0, 2
-; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 4
-; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 8
-; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 16
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a2, $a1, 1
+; LA32R-NEXT: nor $a1, $a1, $a2
+; LA32R-NEXT: orn $a0, $a2, $a0
+; LA32R-NEXT: srli.w $a2, $a0, 2
+; LA32R-NEXT: andn $a1, $a1, $a2
+; LA32R-NEXT: or $a0, $a0, $a2
+; LA32R-NEXT: srli.w $a2, $a0, 4
+; LA32R-NEXT: andn $a1, $a1, $a2
+; LA32R-NEXT: or $a0, $a0, $a2
+; LA32R-NEXT: srli.w $a2, $a0, 8
+; LA32R-NEXT: andn $a1, $a1, $a2
+; LA32R-NEXT: or $a0, $a0, $a2
+; LA32R-NEXT: srli.w $a0, $a0, 16
+; LA32R-NEXT: andn $a0, $a1, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 349525
; LA32R-NEXT: ori $a2, $a2, 1365
@@ -406,16 +435,20 @@ define i64 @test_not_ctlz_i64(i64 %a) nounwind {
; LA32R-NEXT: bne $a6, $zero, .LBB7_2
; LA32R-NEXT: # %bb.1:
; LA32R-NEXT: nor $a1, $a0, $zero
-; LA32R-NEXT: srli.w $a1, $a1, 1
-; LA32R-NEXT: orn $a0, $a1, $a0
-; LA32R-NEXT: srli.w $a1, $a0, 2
-; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 4
-; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 8
-; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 16
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a6, $a1, 1
+; LA32R-NEXT: nor $a1, $a1, $a6
+; LA32R-NEXT: orn $a0, $a6, $a0
+; LA32R-NEXT: srli.w $a6, $a0, 2
+; LA32R-NEXT: andn $a1, $a1, $a6
+; LA32R-NEXT: or $a0, $a0, $a6
+; LA32R-NEXT: srli.w $a6, $a0, 4
+; LA32R-NEXT: andn $a1, $a1, $a6
+; LA32R-NEXT: or $a0, $a0, $a6
+; LA32R-NEXT: srli.w $a6, $a0, 8
+; LA32R-NEXT: andn $a1, $a1, $a6
+; LA32R-NEXT: or $a0, $a0, $a6
+; LA32R-NEXT: srli.w $a0, $a0, 16
+; LA32R-NEXT: andn $a0, $a1, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: and $a1, $a1, $a5
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -433,15 +466,19 @@ define i64 @test_not_ctlz_i64(i64 %a) nounwind {
; LA32R-NEXT: ret
; LA32R-NEXT: .LBB7_2:
; LA32R-NEXT: srli.w $a0, $a6, 1
+; LA32R-NEXT: nor $a6, $a6, $a0
; LA32R-NEXT: orn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 2
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 4
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 8
+; LA32R-NEXT: andn $a6, $a6, $a1
; LA32R-NEXT: or $a0, $a0, $a1
-; LA32R-NEXT: srli.w $a1, $a0, 16
-; LA32R-NEXT: nor $a0, $a0, $a1
+; LA32R-NEXT: srli.w $a0, $a0, 16
+; LA32R-NEXT: andn $a0, $a6, $a0
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: and $a1, $a1, $a5
; LA32R-NEXT: sub.w $a0, $a0, $a1
diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll
index 2c3bf944cdf89..9ff15f946d2d6 100644
--- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll
+++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll
@@ -11,25 +11,31 @@ define i128 @f1(i128 %a) {
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vrepib %v1, 1
; CHECK-NEXT: vsrl %v1, %v0, %v1
+; CHECK-NEXT: vno %v2, %v0, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 2
; CHECK-NEXT: vsrl %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 4
; CHECK-NEXT: vsrl %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 8
; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 16
; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 32
; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 64
-; CHECK-NEXT: vsrlb %v1, %v0, %v1
-; CHECK-NEXT: vno %v0, %v0, %v1
+; CHECK-NEXT: vsrlb %v0, %v0, %v1
+; CHECK-NEXT: vnc %v0, %v2, %v0
; CHECK-NEXT: vpopct %v0, %v0, 0
; CHECK-NEXT: vgbm %v1, 0
; CHECK-NEXT: vsumb %v0, %v0, %v1
@@ -47,25 +53,31 @@ define i128 @f2(i128 %a) {
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vrepib %v1, 1
; CHECK-NEXT: vsrl %v1, %v0, %v1
+; CHECK-NEXT: vno %v2, %v0, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 2
; CHECK-NEXT: vsrl %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 4
; CHECK-NEXT: vsrl %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 8
; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 16
; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 32
; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vnc %v2, %v2, %v1
; CHECK-NEXT: vo %v0, %v0, %v1
; CHECK-NEXT: vrepib %v1, 64
-; CHECK-NEXT: vsrlb %v1, %v0, %v1
-; CHECK-NEXT: vno %v0, %v0, %v1
+; CHECK-NEXT: vsrlb %v0, %v0, %v1
+; CHECK-NEXT: vnc %v0, %v2, %v0
; CHECK-NEXT: vpopct %v0, %v0, 0
; CHECK-NEXT: vgbm %v1, 0
; CHECK-NEXT: vsumb %v0, %v0, %v1
diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll
index bcdedcd3a407b..417fcb90af9a3 100644
--- a/llvm/test/CodeGen/SystemZ/vec-eval.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll
@@ -1889,7 +1889,9 @@ entry:
define <16 x i8> @eval128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval128:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 128
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: vno %v1, %v26, %v26
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 2
; CHECK-NEXT: br %r14
entry:
%and.demorgan = or <16 x i8> %src2, %src1
@@ -1901,9 +1903,10 @@ entry:
define <16 x i8> @eval129(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval129:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
; CHECK-NEXT: vn %v1, %v26, %v24
-; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 139
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v28
; CHECK-NEXT: br %r14
entry:
%and.demorgan = or <16 x i8> %src2, %src1
@@ -2034,8 +2037,10 @@ entry:
define <16 x i8> @eval138(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval138:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127
-; CHECK-NEXT: veval %v24, %v24, %v28, %v0, 174
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: vnc %v1, %v24, %v28
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47
; CHECK-NEXT: br %r14
entry:
%not2 = xor <16 x i8> %src3, splat(i8 -1)
@@ -2050,9 +2055,10 @@ entry:
define <16 x i8> @eval139(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval139:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 11
-; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47
; CHECK-NEXT: br %r14
entry:
%0 = or <16 x i8> %src2, %src1
@@ -2068,8 +2074,10 @@ entry:
define <16 x i8> @eval140(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval140:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 127
-; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 174
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: veval %v0, %v28, %v0, %v24, 40
+; CHECK-NEXT: vnc %v1, %v24, %v26
+; CHECK-NEXT: veval %v24, %v1, %v0, %v26, 47
; CHECK-NEXT: br %r14
entry:
%not1 = xor <16 x i8> %src2, splat(i8 -1)
@@ -2084,10 +2092,11 @@ entry:
define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval141:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v0, 65535
; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
-; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47
-; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47
; CHECK-NEXT: br %r14
entry:
%not1 = xor <16 x i8> %src2, splat(i8 -1)
@@ -2105,9 +2114,10 @@ entry:
define <16 x i8> @eval142(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval142:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127
-; CHECK-NEXT: vn %v1, %v28, %v26
-; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 14
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47
; CHECK-NEXT: br %r14
entry:
%0 = or <16 x i8> %src2, %src1
@@ -2441,8 +2451,10 @@ entry:
define <16 x i8> @eval162(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval162:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: veval %v0, %v28, %v24, %v26, 127
-; CHECK-NEXT: veval %v24, %v26, %v28, %v0, 174
+; CHECK-NEXT: vgbm %v1, 65535
+; CHECK-NEXT: vno %v0, %v28, %v28
+; CHECK-NEXT: veval %v1, %v24, %v1, %v28, 40
+; CHECK-NEXT: vsel %v24, %v0, %v1, %v26
; CHECK-NEXT: br %r14
entry:
%not2 = xor <16 x i8> %src3, splat(i8 -1)
@@ -2457,9 +2469,10 @@ entry:
define <16 x i8> @eval163(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
; CHECK-LABEL: eval163:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 11
-; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47
; CHECK-NEXT: br %r14
entry:
%0 = or <16 x i8> %src2, %src1
>From 3681932bdb9559e53e4f31f4a53cf7af43f361e9 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 22 Oct 2025 19:05:28 +0000
Subject: [PATCH 19/20] [AArch64][PowerPC]: Reverting some updates
---
llvm/test/CodeGen/AArch64/eon.ll | 9 +++
llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll | 1 -
.../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll | 20 ++----
llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll | 65 -------------------
4 files changed, 14 insertions(+), 81 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/eon.ll b/llvm/test/CodeGen/AArch64/eon.ll
index ea0e0122d9b6d..f939b4901be09 100644
--- a/llvm/test/CodeGen/AArch64/eon.ll
+++ b/llvm/test/CodeGen/AArch64/eon.ll
@@ -36,6 +36,10 @@ entry:
; Check that eon is generated if the xor is a disjoint or.
define i64 @disjoint_or(i64 %a, i64 %b) {
+; CHECK-LABEL: disjoint_or:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eon x0, x0, x1
+; CHECK-NEXT: ret
%or = or disjoint i64 %a, %b
%eon = xor i64 %or, -1
ret i64 %eon
@@ -43,6 +47,11 @@ define i64 @disjoint_or(i64 %a, i64 %b) {
; Check that eon is *not* generated if the or is not disjoint.
define i64 @normal_or(i64 %a, i64 %b) {
+; CHECK-LABEL: normal_or:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr x8, x0, x1
+; CHECK-NEXT: mvn x0, x8
+; CHECK-NEXT: ret
%or = or i64 %a, %b
%not = xor i64 %or, -1
ret i64 %not
diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index ed8dc504f026a..bea24ee98336d 100644
--- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | \
; RUN: grep eqv | count 3
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | \
diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
index 310f0a66aa9b9..c23daac80279b 100644
--- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -1,39 +1,29 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; Check the miscellaneous logical vector operations added in P8
-;
+;
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
; Test x eqv y
define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: test_veqv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: veqv 2, 2, 3
-; CHECK-NEXT: blr
%tmp = xor <4 x i32> %x, %y
%ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %ret_val
+; CHECK: veqv 2, 2, 3
}
; Test x vnand y
define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: test_vnand:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vnand 2, 2, 3
-; CHECK-NEXT: blr
%tmp = and <4 x i32> %x, %y
%ret_val = xor <4 x i32> %tmp, <i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %ret_val
+; CHECK: vnand 2, 2, 3
}
; Test x vorc y and variants
define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: test_vorc:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vorc 3, 2, 3
-; CHECK-NEXT: vorc 2, 2, 3
-; CHECK-NEXT: blr
%tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp2 = or <4 x i32> %x, %tmp1
+; CHECK: vorc 3, 2, 3
%tmp3 = xor <4 x i32> %tmp2, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp4 = or <4 x i32> %tmp3, %x
+; CHECK: vorc 2, 2, 3
ret <4 x i32> %tmp4
}
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
index 7f7a52fe7de65..ba74df956e71e 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -7,10 +6,6 @@
; CHECK: xxlandc v2, v2, v3
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_not(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_not:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlandc v2, v2, v3
-; CHECK-NEXT: blr
entry:
%neg = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
%and = and <4 x i32> %neg, %A
@@ -22,10 +17,6 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <16 x i8> @and_and8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT: blr
entry:
%and = and <16 x i8> %B, %A
%and1 = and <16 x i8> %and, %C
@@ -37,10 +28,6 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <8 x i16> @and_and16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT: blr
entry:
%and = and <8 x i16> %B, %A
%and1 = and <8 x i16> %and, %C
@@ -52,10 +39,6 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_and32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %A
%and1 = and <4 x i32> %and, %C
@@ -67,10 +50,6 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 1
; CHECK-NEXT: blr
define dso_local <2 x i64> @and_and64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_and64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v3, v2, v4, 1
-; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %A
%and1 = and <2 x i64> %and, %C
@@ -82,10 +61,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 14
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_nand:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 14
-; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %C, %B
%neg = xor <4 x i32> %and, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -98,10 +73,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 7
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_or:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 7
-; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %C, %B
%and = and <4 x i32> %or, %A
@@ -113,10 +84,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 8
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_nor:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 8
-; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %C, %B
%neg = xor <4 x i32> %or, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -129,10 +96,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 6
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_xor:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 6
-; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %C, %B
%and = and <4 x i32> %xor, %A
@@ -144,10 +107,6 @@ entry:
; CHECK: xxeval v2, v2, v3, v4, 9
; CHECK-NEXT: blr
define dso_local <4 x i32> @and_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: and_eqv:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 9
-; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg = xor <4 x i32> %xor, %C
@@ -160,10 +119,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 241
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_nand:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 241
-; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %C, %B
%A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -176,10 +131,6 @@ entry:
; CHECK: xxeval v2, v3, v2, v4, 254
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_and(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_and:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v3, v2, v4, 254
-; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %A
%and1 = and <4 x i32> %and, %C
@@ -192,10 +143,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 249
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_xor:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 249
-; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %C, %B
%and = and <4 x i32> %xor, %A
@@ -208,10 +155,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 246
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_eqv:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 246
-; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %C, %B
%A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -224,10 +167,6 @@ entry:
; CHECK: xxeval v2, v2, v4, v3, 248
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_or:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v4, v3, 248
-; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %C, %B
%and = and <4 x i32> %or, %A
@@ -240,10 +179,6 @@ entry:
; CHECK: xxeval v2, v2, v3, v4, 247
; CHECK-NEXT: blr
define dso_local <4 x i32> @nand_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 {
-; CHECK-LABEL: nand_nor:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 247
-; CHECK-NEXT: blr
entry:
%A.not = xor <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
%or = or <4 x i32> %A.not, %B
>From b3ec648bec97aa472e73a7cb4bcc93c8bbbfbf5d Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 22 Oct 2025 19:06:41 +0000
Subject: [PATCH 20/20] [X86]: Removed comment
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 7 -------
1 file changed, 7 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e870514db2443..16f6d31728717 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55615,13 +55615,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
// Folds for better commutativity:
if (N1->hasOneUse()) {
- /*
- // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
- if (SDValue Not = IsNOT(N1, DAG))
- return DAG.getNOT(
- DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
- */
-
// ANDNP(x,PSHUFB(y,z)) -> PSHUFB(y,OR(z,x))
// Zero out elements by setting the PSHUFB mask value to 0xFF.
if (DAG.ComputeNumSignBits(N0) == EltSizeInBits) {
More information about the llvm-commits
mailing list