[llvm] [X86] Consistently use 'k' for predicate mask registers in instruction names (PR #108780)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 15:22:17 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/108780
We use 'k' for move instructions and to indicate masked variants of evex instructions, but otherwise we're very inconsistent when we use 'k' vs 'r'.
>From 0ae3fbecade6e89975075c689c16213c07466010 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Sun, 15 Sep 2024 23:21:08 +0100
Subject: [PATCH] [X86] Consistently use 'k' for predicate mask registers in
instruction names
We use 'k' for move instructions and to indicate masked variants of evex instructions, but otherwise we're very inconsistent when we use 'k' vs 'r'.
---
llvm/lib/Target/X86/X86DomainReassignment.cpp | 124 +++++++++---------
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 26 ++--
llvm/lib/Target/X86/X86InstrAVX512.td | 46 +++----
llvm/lib/Target/X86/X86InstrInfo.cpp | 46 +++----
llvm/lib/Target/X86/X86InstrVecCompiler.td | 30 ++---
llvm/lib/Target/X86/X86SchedIceLake.td | 32 ++---
llvm/lib/Target/X86/X86SchedSapphireRapids.td | 26 ++--
llvm/lib/Target/X86/X86SchedSkylakeServer.td | 32 ++---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 20 +--
.../CodeGen/X86/apx/domain-reassignment.mir | 78 +++++------
llvm/test/CodeGen/X86/domain-reassignment.mir | 78 +++++------
.../CodeGen/X86/masked_compressstore_isel.ll | 8 +-
.../CodeGen/X86/masked_expandload_isel.ll | 8 +-
13 files changed, 277 insertions(+), 277 deletions(-)
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 831944cce3afdd..4823183113989a 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -636,21 +636,21 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk));
createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
- createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
- createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
- createReplacer(X86::NOT16r, X86::KNOTWrr);
- createReplacer(X86::OR16rr, X86::KORWrr);
- createReplacer(X86::AND16rr, X86::KANDWrr);
- createReplacer(X86::XOR16rr, X86::KXORWrr);
+ createReplacer(X86::SHR16ri, X86::KSHIFTRWki);
+ createReplacer(X86::SHL16ri, X86::KSHIFTLWki);
+ createReplacer(X86::NOT16r, X86::KNOTWkk);
+ createReplacer(X86::OR16rr, X86::KORWkk);
+ createReplacer(X86::AND16rr, X86::KANDWkk);
+ createReplacer(X86::XOR16rr, X86::KXORWkk);
bool HasNDD = STI->hasNDD();
if (HasNDD) {
- createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWri);
- createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWri);
- createReplacer(X86::NOT16r_ND, X86::KNOTWrr);
- createReplacer(X86::OR16rr_ND, X86::KORWrr);
- createReplacer(X86::AND16rr_ND, X86::KANDWrr);
- createReplacer(X86::XOR16rr_ND, X86::KXORWrr);
+ createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWki);
+ createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWki);
+ createReplacer(X86::NOT16r_ND, X86::KNOTWkk);
+ createReplacer(X86::OR16rr_ND, X86::KORWkk);
+ createReplacer(X86::AND16rr_ND, X86::KANDWkk);
+ createReplacer(X86::XOR16rr_ND, X86::KXORWkk);
}
if (STI->hasBWI()) {
@@ -663,86 +663,86 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk));
createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk));
- createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
- createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
+ createReplacer(X86::SHR32ri, X86::KSHIFTRDki);
+ createReplacer(X86::SHR64ri, X86::KSHIFTRQki);
- createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
- createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
+ createReplacer(X86::SHL32ri, X86::KSHIFTLDki);
+ createReplacer(X86::SHL64ri, X86::KSHIFTLQki);
- createReplacer(X86::ADD32rr, X86::KADDDrr);
- createReplacer(X86::ADD64rr, X86::KADDQrr);
+ createReplacer(X86::ADD32rr, X86::KADDDkk);
+ createReplacer(X86::ADD64rr, X86::KADDQkk);
- createReplacer(X86::NOT32r, X86::KNOTDrr);
- createReplacer(X86::NOT64r, X86::KNOTQrr);
+ createReplacer(X86::NOT32r, X86::KNOTDkk);
+ createReplacer(X86::NOT64r, X86::KNOTQkk);
- createReplacer(X86::OR32rr, X86::KORDrr);
- createReplacer(X86::OR64rr, X86::KORQrr);
+ createReplacer(X86::OR32rr, X86::KORDkk);
+ createReplacer(X86::OR64rr, X86::KORQkk);
- createReplacer(X86::AND32rr, X86::KANDDrr);
- createReplacer(X86::AND64rr, X86::KANDQrr);
+ createReplacer(X86::AND32rr, X86::KANDDkk);
+ createReplacer(X86::AND64rr, X86::KANDQkk);
- createReplacer(X86::ANDN32rr, X86::KANDNDrr);
- createReplacer(X86::ANDN64rr, X86::KANDNQrr);
+ createReplacer(X86::ANDN32rr, X86::KANDNDkk);
+ createReplacer(X86::ANDN64rr, X86::KANDNQkk);
- createReplacer(X86::XOR32rr, X86::KXORDrr);
- createReplacer(X86::XOR64rr, X86::KXORQrr);
+ createReplacer(X86::XOR32rr, X86::KXORDkk);
+ createReplacer(X86::XOR64rr, X86::KXORQkk);
if (HasNDD) {
- createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDri);
- createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDri);
- createReplacer(X86::ADD32rr_ND, X86::KADDDrr);
- createReplacer(X86::NOT32r_ND, X86::KNOTDrr);
- createReplacer(X86::OR32rr_ND, X86::KORDrr);
- createReplacer(X86::AND32rr_ND, X86::KANDDrr);
- createReplacer(X86::XOR32rr_ND, X86::KXORDrr);
- createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQri);
- createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQri);
- createReplacer(X86::ADD64rr_ND, X86::KADDQrr);
- createReplacer(X86::NOT64r_ND, X86::KNOTQrr);
- createReplacer(X86::OR64rr_ND, X86::KORQrr);
- createReplacer(X86::AND64rr_ND, X86::KANDQrr);
- createReplacer(X86::XOR64rr_ND, X86::KXORQrr);
+ createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDki);
+ createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDki);
+ createReplacer(X86::ADD32rr_ND, X86::KADDDkk);
+ createReplacer(X86::NOT32r_ND, X86::KNOTDkk);
+ createReplacer(X86::OR32rr_ND, X86::KORDkk);
+ createReplacer(X86::AND32rr_ND, X86::KANDDkk);
+ createReplacer(X86::XOR32rr_ND, X86::KXORDkk);
+ createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQki);
+ createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQki);
+ createReplacer(X86::ADD64rr_ND, X86::KADDQkk);
+ createReplacer(X86::NOT64r_ND, X86::KNOTQkk);
+ createReplacer(X86::OR64rr_ND, X86::KORQkk);
+ createReplacer(X86::AND64rr_ND, X86::KANDQkk);
+ createReplacer(X86::XOR64rr_ND, X86::KXORQkk);
}
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
- // createReplacer(X86::TEST32rr, X86::KTESTDrr);
- // createReplacer(X86::TEST64rr, X86::KTESTQrr);
+ // createReplacer(X86::TEST32rr, X86::KTESTDkk);
+ // createReplacer(X86::TEST64rr, X86::KTESTQkk);
}
if (STI->hasDQI()) {
- createReplacer(X86::ADD8rr, X86::KADDBrr);
- createReplacer(X86::ADD16rr, X86::KADDWrr);
+ createReplacer(X86::ADD8rr, X86::KADDBkk);
+ createReplacer(X86::ADD16rr, X86::KADDWkk);
- createReplacer(X86::AND8rr, X86::KANDBrr);
+ createReplacer(X86::AND8rr, X86::KANDBkk);
createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk));
createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
- createReplacer(X86::NOT8r, X86::KNOTBrr);
+ createReplacer(X86::NOT8r, X86::KNOTBkk);
- createReplacer(X86::OR8rr, X86::KORBrr);
+ createReplacer(X86::OR8rr, X86::KORBkk);
- createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
- createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
+ createReplacer(X86::SHR8ri, X86::KSHIFTRBki);
+ createReplacer(X86::SHL8ri, X86::KSHIFTLBki);
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
- // createReplacer(X86::TEST8rr, X86::KTESTBrr);
- // createReplacer(X86::TEST16rr, X86::KTESTWrr);
+ // createReplacer(X86::TEST8rr, X86::KTESTBkk);
+ // createReplacer(X86::TEST16rr, X86::KTESTWkk);
- createReplacer(X86::XOR8rr, X86::KXORBrr);
+ createReplacer(X86::XOR8rr, X86::KXORBkk);
if (HasNDD) {
- createReplacer(X86::ADD8rr_ND, X86::KADDBrr);
- createReplacer(X86::ADD16rr_ND, X86::KADDWrr);
- createReplacer(X86::AND8rr_ND, X86::KANDBrr);
- createReplacer(X86::NOT8r_ND, X86::KNOTBrr);
- createReplacer(X86::OR8rr_ND, X86::KORBrr);
- createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBri);
- createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBri);
- createReplacer(X86::XOR8rr_ND, X86::KXORBrr);
+ createReplacer(X86::ADD8rr_ND, X86::KADDBkk);
+ createReplacer(X86::ADD16rr_ND, X86::KADDWkk);
+ createReplacer(X86::AND8rr_ND, X86::KANDBkk);
+ createReplacer(X86::NOT8r_ND, X86::KNOTBkk);
+ createReplacer(X86::OR8rr_ND, X86::KORBkk);
+ createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBki);
+ createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBki);
+ createReplacer(X86::XOR8rr_ND, X86::KXORBkk);
}
}
#undef GET_EGPR_IF_ENABLED
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d0a54ab8993c26..e8a9da1f17d962 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1647,10 +1647,10 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
// used. We're doing this late so we can prefer to fold the AND into masked
// comparisons. Doing that can be better for the live range of the mask
// register.
- case X86::KORTESTBrr:
- case X86::KORTESTWrr:
- case X86::KORTESTDrr:
- case X86::KORTESTQrr: {
+ case X86::KORTESTBkk:
+ case X86::KORTESTWkk:
+ case X86::KORTESTDkk:
+ case X86::KORTESTQkk: {
SDValue Op0 = N->getOperand(0);
if (Op0 != N->getOperand(1) || !N->isOnlyUserOf(Op0.getNode()) ||
!Op0.isMachineOpcode() || !onlyUsesZeroFlag(SDValue(N, 0)))
@@ -1661,10 +1661,10 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
switch (Op0.getMachineOpcode()) {
default:
continue;
- CASE(KANDBrr)
- CASE(KANDWrr)
- CASE(KANDDrr)
- CASE(KANDQrr)
+ CASE(KANDBkk)
+ CASE(KANDWkk)
+ CASE(KANDDkk)
+ CASE(KANDQkk)
}
unsigned NewOpc;
#define FROM_TO(A, B) \
@@ -1672,14 +1672,14 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
NewOpc = X86::B; \
break;
switch (Opc) {
- FROM_TO(KORTESTBrr, KTESTBrr)
- FROM_TO(KORTESTWrr, KTESTWrr)
- FROM_TO(KORTESTDrr, KTESTDrr)
- FROM_TO(KORTESTQrr, KTESTQrr)
+ FROM_TO(KORTESTBkk, KTESTBkk)
+ FROM_TO(KORTESTWkk, KTESTWkk)
+ FROM_TO(KORTESTDkk, KTESTDkk)
+ FROM_TO(KORTESTQkk, KTESTQkk)
}
// KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
// KAND instructions and KTEST use the same ISA feature.
- if (NewOpc == X86::KTESTWrr && !Subtarget->hasDQI())
+ if (NewOpc == X86::KTESTWkk && !Subtarget->hasDQI())
continue;
#undef FROM_TO
MachineSDNode *KTest = CurDAG->getMachineNode(
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b6bf34a8a0d31c..9ed59803c1f9d9 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2782,7 +2782,7 @@ multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
X86FoldableSchedWrite sched, Predicate prd> {
let Predicates = [prd] in
- def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+ def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (OpNode KRC:$src))]>,
Sched<[sched]>;
@@ -2807,14 +2807,14 @@ defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
def : Pat<(vnot VK8:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
def : Pat<(vnot VK4:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
def : Pat<(vnot VK2:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
def : Pat<(vnot VK1:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
@@ -2823,7 +2823,7 @@ multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, Predicate prd,
bit IsCommutable> {
let Predicates = [prd], isCommutable = IsCommutable in
- def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
+ def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
@@ -2877,11 +2877,11 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode,
(COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
}
-defm : avx512_binop_pat<and, KANDWrr>;
-defm : avx512_binop_pat<vandn, KANDNWrr>;
-defm : avx512_binop_pat<or, KORWrr>;
-defm : avx512_binop_pat<vxnor, KXNORWrr>;
-defm : avx512_binop_pat<xor, KXORWrr>;
+defm : avx512_binop_pat<and, KANDWkk>;
+defm : avx512_binop_pat<vandn, KANDNWkk>;
+defm : avx512_binop_pat<or, KORWkk>;
+defm : avx512_binop_pat<vxnor, KXNORWkk>;
+defm : avx512_binop_pat<xor, KXORWkk>;
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
@@ -2889,13 +2889,13 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
- def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
+ def kk : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
(ins Src.KRC:$src1, Src.KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX, VVVV, VEX_L, Sched<[sched]>;
def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
- (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
+ (!cast<Instruction>(NAME#kk) Src.KRC:$src2, Src.KRC:$src1)>;
}
}
@@ -2908,7 +2908,7 @@ multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode, X86FoldableSchedWrite sched,
Predicate prd> {
let Predicates = [prd], Defs = [EFLAGS] in
- def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
+ def kk : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
Sched<[sched]>;
@@ -2935,7 +2935,7 @@ defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode, X86FoldableSchedWrite sched> {
let Predicates = [HasAVX512] in
- def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
+ def ki : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
[(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
@@ -3463,12 +3463,12 @@ def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
(v8i64 VR512:$src))),
- (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
- (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
+ (VMOVDQA32Zrrkz (KNOTWkk VK16WM:$mask), VR512:$src)>;
// These patterns exist to prevent the above patterns from introducing a second
// mask inversion when one already exists.
@@ -10425,7 +10425,7 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
-def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+def rk : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
EVEX, Sched<[Sched]>;
@@ -10448,7 +10448,7 @@ defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI
defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
- def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
+ def kr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
EVEX, Sched<[WriteMove]>;
@@ -10461,7 +10461,7 @@ multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
(_.KVT (COPY_TO_REGCLASS
- (!cast<Instruction>(Name#"Zrr")
+ (!cast<Instruction>(Name#"Zkr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx)),
_.KRC))>;
@@ -10499,14 +10499,14 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
// a target independent DAG combine likes to combine sext and trunc.
let Predicates = [HasDQI, NoBWI] in {
def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
- (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+ (VPMOVDBZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>;
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
- (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+ (VPMOVDWZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>;
}
let Predicates = [HasDQI, NoBWI, HasVLX] in {
def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
- (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
+ (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rk VK8:$src)))>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a74da000af0cee..57a894b09e0445 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6323,17 +6323,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// FIXME: A more advanced approach would be to choose the best input mask
// register based on context.
case X86::KSET0W:
- return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXORWkk), X86::K0);
case X86::KSET0D:
- return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXORDkk), X86::K0);
case X86::KSET0Q:
- return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXORQkk), X86::K0);
case X86::KSET1W:
- return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXNORWkk), X86::K0);
case X86::KSET1D:
- return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXNORDkk), X86::K0);
case X86::KSET1Q:
- return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXNORQkk), X86::K0);
case TargetOpcode::LOAD_STACK_GUARD:
expandLoadStackGuard(MIB, *this);
return true;
@@ -9805,22 +9805,22 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
case X86::VXORPSZ256rr:
case X86::VXORPDZrr:
case X86::VXORPSZrr:
- case X86::KADDBrr:
- case X86::KADDWrr:
- case X86::KADDDrr:
- case X86::KADDQrr:
- case X86::KANDBrr:
- case X86::KANDWrr:
- case X86::KANDDrr:
- case X86::KANDQrr:
- case X86::KORBrr:
- case X86::KORWrr:
- case X86::KORDrr:
- case X86::KORQrr:
- case X86::KXORBrr:
- case X86::KXORWrr:
- case X86::KXORDrr:
- case X86::KXORQrr:
+ case X86::KADDBkk:
+ case X86::KADDWkk:
+ case X86::KADDDkk:
+ case X86::KADDQkk:
+ case X86::KANDBkk:
+ case X86::KANDWkk:
+ case X86::KANDDkk:
+ case X86::KANDQkk:
+ case X86::KORBkk:
+ case X86::KORWkk:
+ case X86::KORDkk:
+ case X86::KORQkk:
+ case X86::KXORBkk:
+ case X86::KXORWkk:
+ case X86::KXORDkk:
+ case X86::KXORQkk:
case X86::VPADDBrr:
case X86::VPADDWrr:
case X86::VPADDDrr:
@@ -10729,7 +10729,7 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
return;
// KXOR is safe to use because it doesn't affect flags.
- unsigned Op = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+ unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk;
BuildMI(MBB, Iter, DL, get(Op), Reg)
.addReg(Reg, RegState::Undef)
.addReg(Reg, RegState::Undef);
diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 461b2badc13134..122627ca45d31f 100644
--- a/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -280,24 +280,24 @@ let Predicates = [HasBWI, HasVLX] in {
let Predicates = [HasAVX512] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
- (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
+ (KSHIFTRWki (KSHIFTLWki (COPY_TO_REGCLASS VK1:$mask, VK16),
(i8 15)), (i8 15))>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
- (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
+ (KSHIFTRWki (KSHIFTLWki (COPY_TO_REGCLASS VK2:$mask, VK16),
(i8 14)), (i8 14))>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
- (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
+ (KSHIFTRWki (KSHIFTLWki (COPY_TO_REGCLASS VK4:$mask, VK16),
(i8 12)), (i8 12))>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
- (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (KSHIFTRWki (KSHIFTLWki (COPY_TO_REGCLASS VK8:$mask, VK16),
(i8 8)), (i8 8))>;
}
@@ -308,15 +308,15 @@ let Predicates = [HasDQI] in {
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
- (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
+ (KSHIFTRBki (KSHIFTLBki (COPY_TO_REGCLASS VK1:$mask, VK8),
(i8 7)), (i8 7))>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
- (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
+ (KSHIFTRBki (KSHIFTLBki (COPY_TO_REGCLASS VK2:$mask, VK8),
(i8 6)), (i8 6))>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
- (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
+ (KSHIFTRBki (KSHIFTLBki (COPY_TO_REGCLASS VK4:$mask, VK8),
(i8 4)), (i8 4))>;
}
@@ -336,12 +336,12 @@ let Predicates = [HasBWI] in {
let Predicates = [HasBWI, NoDQI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
- (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
+ (KSHIFTRDki (KSHIFTLDki (COPY_TO_REGCLASS VK8:$mask, VK32),
(i8 24)), (i8 24))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
- (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
+ (KSHIFTRQki (KSHIFTLQki (COPY_TO_REGCLASS VK8:$mask, VK64),
(i8 56)), (i8 56))>;
}
@@ -358,28 +358,28 @@ let Predicates = [HasBWI, HasDQI] in {
let Predicates = [HasBWI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
- (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
+ (KSHIFTRDki (KSHIFTLDki (COPY_TO_REGCLASS VK1:$mask, VK32),
(i8 31)), (i8 31))>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
- (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
+ (KSHIFTRDki (KSHIFTLDki (COPY_TO_REGCLASS VK2:$mask, VK32),
(i8 30)), (i8 30))>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
- (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
+ (KSHIFTRDki (KSHIFTLDki (COPY_TO_REGCLASS VK4:$mask, VK32),
(i8 28)), (i8 28))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
- (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
+ (KSHIFTRQki (KSHIFTLQki (COPY_TO_REGCLASS VK1:$mask, VK64),
(i8 63)), (i8 63))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
- (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
+ (KSHIFTRQki (KSHIFTLQki (COPY_TO_REGCLASS VK2:$mask, VK64),
(i8 62)), (i8 62))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
- (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
+ (KSHIFTRQki (KSHIFTLQki (COPY_TO_REGCLASS VK4:$mask, VK64),
(i8 60)), (i8 60))>;
}
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index b32db53ff08cde..72fbcc5598108f 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -633,13 +633,13 @@ def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> {
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
-def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
- "KANDN(B|D|Q|W)rr",
+def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)kk",
+ "KANDN(B|D|Q|W)kk",
"KMOV(B|D|Q|W)kk",
- "KNOT(B|D|Q|W)rr",
- "KOR(B|D|Q|W)rr",
- "KXNOR(B|D|Q|W)rr",
- "KXOR(B|D|Q|W)rr",
+ "KNOT(B|D|Q|W)kk",
+ "KOR(B|D|Q|W)kk",
+ "KXNOR(B|D|Q|W)kk",
+ "KXOR(B|D|Q|W)kk",
"KSET0(B|D|Q|W)", // Same as KXOR
"KSET1(B|D|Q|W)", // Same as KXNOR
"MMX_PADDS(B|W)rr",
@@ -651,10 +651,10 @@ def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"MMX_P(MAX|MIN)UBrr",
"MMX_PSUBS(B|W)rr",
"MMX_PSUBUS(B|W)rr",
- "VPMOVB2M(Z|Z128|Z256)rr",
- "VPMOVD2M(Z|Z128|Z256)rr",
- "VPMOVQ2M(Z|Z128|Z256)rr",
- "VPMOVW2M(Z|Z128|Z256)rr")>;
+ "VPMOVB2M(Z|Z128|Z256)kr",
+ "VPMOVD2M(Z|Z128|Z256)kr",
+ "VPMOVQ2M(Z|Z128|Z256)kr",
+ "VPMOVW2M(Z|Z128|Z256)kr")>;
def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
let Latency = 1;
@@ -826,8 +826,8 @@ def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> {
let ReleaseAtCycles = [1];
}
def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
- "KORTEST(B|D|Q|W)rr",
- "KTEST(B|D|Q|W)rr")>;
+ "KORTEST(B|D|Q|W)kk",
+ "KTEST(B|D|Q|W)kk")>;
def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> {
let Latency = 3;
@@ -854,10 +854,10 @@ def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
-def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
- "KSHIFTL(B|D|Q|W)ri",
- "KSHIFTR(B|D|Q|W)ri",
- "KUNPCK(BW|DQ|WD)rr",
+def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)kk",
+ "KSHIFTL(B|D|Q|W)ki",
+ "KSHIFTR(B|D|Q|W)ki",
+ "KUNPCK(BW|DQ|WD)kk",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
"VCMP(SD|SS)Zrr",
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 1fb3c7560a5724..9818f4c01ea678 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -1318,17 +1318,17 @@ def : InstRW<[SPRWriteResGroup90], (instrs VZEROUPPER)>;
def SPRWriteResGroup91 : SchedWriteRes<[SPRPort05]> {
let Latency = 4;
}
-def : InstRW<[SPRWriteResGroup91], (instregex "^KADD(B|D|Q|W)rr$",
- "^KSHIFT(LB|RD|RQ|RW)ri$",
- "^KSHIFT(LD|RB)ri$",
- "^KSHIFTL(Q|W)ri$",
- "^KUNPCK(BW|DQ|WD)rr$")>;
+def : InstRW<[SPRWriteResGroup91], (instregex "^KADD(B|D|Q|W)kk",
+ "^KSHIFT(LB|RD|RQ|RW)ki$",
+ "^KSHIFT(LD|RB)ki$",
+ "^KSHIFTL(Q|W)ki$",
+ "^KUNPCK(BW|DQ|WD)kk$")>;
def SPRWriteResGroup92 : SchedWriteRes<[SPRPort00]>;
-def : InstRW<[SPRWriteResGroup92], (instregex "^KAND(B|D|Q|W|ND|NQ|NW)rr$",
+def : InstRW<[SPRWriteResGroup92], (instregex "^KAND(B|D|Q|W|ND|NQ|NW)kk$",
"^KMOV(B|D|Q|W)kk$",
- "^KNOT(B|D|Q|W)rr$",
- "^K((X|XN)?)OR(B|D|Q|W)rr$",
+ "^KNOT(B|D|Q|W)kk$",
+ "^K((X|XN)?)OR(B|D|Q|W)kk$",
"^VP(A|SU)BSBZrr$",
"^VPABS(D|Q|W)Zrr$",
"^VPABS(D|Q)Zrrk(z?)$",
@@ -1342,7 +1342,7 @@ def : InstRW<[SPRWriteResGroup92], (instregex "^KAND(B|D|Q|W|ND|NQ|NW)rr$",
"^VPSH(L|R)DV(D|Q|W)Zr$",
"^VPSH(L|R)DV(D|Q)Zrk(z?)$",
"^VPSUB(U?)SWZrr$")>;
-def : InstRW<[SPRWriteResGroup92], (instrs KANDNBrr,
+def : InstRW<[SPRWriteResGroup92], (instrs KANDNBkk,
VPSUBUSBZrr)>;
def SPRWriteResGroup93 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
@@ -1378,7 +1378,7 @@ def : InstRW<[SPRWriteResGroup95], (instrs KMOVQkr,
def SPRWriteResGroup96 : SchedWriteRes<[SPRPort00]> {
let Latency = 3;
}
-def : InstRW<[SPRWriteResGroup96], (instregex "^K((OR)?)TEST(B|D|Q|W)rr$",
+def : InstRW<[SPRWriteResGroup96], (instregex "^K((OR)?)TEST(B|D|Q|W)kk$",
"^VP(A|SU)BS(B|W)Zrrk(z?)$",
"^VPADD(U?)S(B|W)Zrrk(z?)$",
"^VPAVG(B|W)Zrrk(z?)$",
@@ -1867,7 +1867,7 @@ def : InstRW<[SPRWriteResGroup160], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$",
"^VP(ADD|SUB)(B|D|Q|W)Yrr$",
"^VP(ADD|SUB)(B|D|Q|W)Z(128|256)rr$",
"^VP(ADD|SUB)(D|Q)Z(128|256)rrk(z?)$",
- "^VPMOVM2(D|Q)Z128rr$",
+ "^VPMOVM2(D|Q)Z128rk$",
"^VPTERNLOG(D|Q)Z(128|256)rri((k|kz)?)$")>;
def : InstRW<[SPRWriteResGroup160], (instrs VPBLENDDrri)>;
@@ -2534,7 +2534,7 @@ def : InstRW<[SPRWriteResGroup250], (instregex "^V(ADD|SUB)P(D|S)Zrr(b?)$",
"^VMOVDQU(8|16)Zrrk(z?)((_REV)?)$",
"^VP(ADD|SUB)(B|W)Zrrk(z?)$",
"^VPBLENDM(B|W)Zrrk(z?)$",
- "^VPMOVM2(B|W)Zrr$")>;
+ "^VPMOVM2(B|W)Zrk$")>;
def SPRWriteResGroup251 : SchedWriteRes<[SPRPort00_01]> {
let Latency = 6;
@@ -4001,7 +4001,7 @@ def : InstRW<[SPRWriteResGroup418], (instregex "^VMOVDQU(8|16)Z(128|256)rrk(z?)(
"^VMOVSHZrrk(z?)((_REV)?)$",
"^VP(ADD|SUB)(B|W)Z(128|256)rrk(z?)$",
"^VPBLENDM(B|W)Z(128|256)rrk(z?)$",
- "^VPMOVM2(B|W)Z(128|256)rr$")>;
+ "^VPMOVM2(B|W)Z(128|256)rk$")>;
def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
let ReleaseAtCycles = [1, 2, 2];
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 649d38de185a80..26e290a2250c9f 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -628,13 +628,13 @@ def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> {
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
-def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
- "KANDN(B|D|Q|W)rr",
+def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)kk",
+ "KANDN(B|D|Q|W)kk",
"KMOV(B|D|Q|W)kk",
- "KNOT(B|D|Q|W)rr",
- "KOR(B|D|Q|W)rr",
- "KXNOR(B|D|Q|W)rr",
- "KXOR(B|D|Q|W)rr",
+ "KNOT(B|D|Q|W)kk",
+ "KOR(B|D|Q|W)kk",
+ "KXNOR(B|D|Q|W)kk",
+ "KXOR(B|D|Q|W)kk",
"KSET0(B|D|Q|W)", // Same as KXOR
"KSET1(B|D|Q|W)", // Same as KXNOR
"MMX_PADDS(B|W)rr",
@@ -646,10 +646,10 @@ def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"MMX_P(MAX|MIN)UBrr",
"MMX_PSUBS(B|W)rr",
"MMX_PSUBUS(B|W)rr",
- "VPMOVB2M(Z|Z128|Z256)rr",
- "VPMOVD2M(Z|Z128|Z256)rr",
- "VPMOVQ2M(Z|Z128|Z256)rr",
- "VPMOVW2M(Z|Z128|Z256)rr")>;
+ "VPMOVB2M(Z|Z128|Z256)kr",
+ "VPMOVD2M(Z|Z128|Z256)kr",
+ "VPMOVQ2M(Z|Z128|Z256)kr",
+ "VPMOVW2M(Z|Z128|Z256)kr")>;
def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> {
let Latency = 1;
@@ -812,8 +812,8 @@ def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> {
let ReleaseAtCycles = [1];
}
def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
- "KORTEST(B|D|Q|W)rr",
- "KTEST(B|D|Q|W)rr")>;
+ "KORTEST(B|D|Q|W)kk",
+ "KTEST(B|D|Q|W)kk")>;
def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
let Latency = 3;
@@ -839,10 +839,10 @@ def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
let NumMicroOps = 1;
let ReleaseAtCycles = [1];
}
-def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
- "KSHIFTL(B|D|Q|W)ri",
- "KSHIFTR(B|D|Q|W)ri",
- "KUNPCK(BW|DQ|WD)rr",
+def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)kk",
+ "KSHIFTL(B|D|Q|W)ki",
+ "KSHIFTR(B|D|Q|W)ki",
+ "KUNPCK(BW|DQ|WD)kk",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
"VCMP(SD|SS)Zrr",
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index f82f9a88bb25e0..7f0d5115e93c3a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1081,18 +1081,18 @@ def Zn4WriteVecOpMask : SchedWriteRes<[Zn4FPOpMask01]> {
let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
-def : InstRW<[Zn4WriteVecOpMask], (instrs KADDBrr, KADDDrr, KADDQrr, KADDWrr,
- KANDBrr, KANDDrr, KANDQrr, KANDWrr,
- KANDNBrr, KANDNDrr, KANDNQrr, KANDNWrr,
+def : InstRW<[Zn4WriteVecOpMask], (instrs KADDBkk, KADDDkk, KADDQkk, KADDWkk,
+ KANDBkk, KANDDkk, KANDQkk, KANDWkk,
+ KANDNBkk, KANDNDkk, KANDNQkk, KANDNWkk,
KMOVBkk, KMOVDkk, KMOVQkk, KMOVWkk,
KMOVBrk, KMOVDrk, KMOVQrk, KMOVWrk,
- KNOTBrr, KNOTDrr, KNOTQrr, KNOTWrr,
- KORBrr, KORDrr, KORQrr, KORWrr,
- KORTESTBrr, KORTESTDrr, KORTESTQrr, KORTESTWrr,
- KTESTBrr, KTESTDrr, KTESTQrr, KTESTWrr,
- KUNPCKBWrr, KUNPCKDQrr, KUNPCKWDrr,
- KXNORBrr, KXNORDrr, KXNORQrr, KXNORWrr,
- KXORBrr, KXORDrr, KXORQrr, KXORWrr)>;
+ KNOTBkk, KNOTDkk, KNOTQkk, KNOTWkk,
+ KORBkk, KORDkk, KORQkk, KORWkk,
+ KORTESTBkk, KORTESTDkk, KORTESTQkk, KORTESTWkk,
+ KTESTBkk, KTESTDkk, KTESTQkk, KTESTWkk,
+ KUNPCKBWkk, KUNPCKDQkk, KUNPCKWDkk,
+ KXNORBkk, KXNORDkk, KXNORQkk, KXNORWkk,
+ KXORBkk, KXORDkk, KXORQkk, KXORWkk)>;
def Zn4WriteVecOpMaskMemMov : SchedWriteRes<[Zn4FPOpMask4]> {
let Latency = 1;
diff --git a/llvm/test/CodeGen/X86/apx/domain-reassignment.mir b/llvm/test/CodeGen/X86/apx/domain-reassignment.mir
index 7352aa2b307f9a..49af7a6c949a12 100644
--- a/llvm/test/CodeGen/X86/apx/domain-reassignment.mir
+++ b/llvm/test/CodeGen/X86/apx/domain-reassignment.mir
@@ -273,15 +273,15 @@ body: |
; CHECK: [[VCMPPDZrri:%[0-9]+]]:vk8 = VCMPPDZrri [[COPY3]], [[COPY4]], 0
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPDZrri]]
; CHECK: [[COPY6:%[0-9]+]]:vk8 = COPY [[COPY5]]
- ; CHECK: [[KSHIFTRBri:%[0-9]+]]:vk8 = KSHIFTRBri [[COPY6]], 2
- ; CHECK: [[KSHIFTLBri:%[0-9]+]]:vk8 = KSHIFTLBri [[KSHIFTRBri]], 1
- ; CHECK: [[KNOTBrr:%[0-9]+]]:vk8 = KNOTBrr [[KSHIFTLBri]]
- ; CHECK: [[KORBrr:%[0-9]+]]:vk8 = KORBrr [[KNOTBrr]], [[KSHIFTRBri]]
- ; CHECK: [[KANDBrr:%[0-9]+]]:vk8 = KANDBrr [[KORBrr]], [[KSHIFTLBri]]
- ; CHECK: [[KXORBrr:%[0-9]+]]:vk8 = KXORBrr [[KANDBrr]], [[KSHIFTRBri]]
- ; CHECK: [[KADDBrr:%[0-9]+]]:vk8 = KADDBrr [[KXORBrr]], [[KNOTBrr]]
+ ; CHECK: [[KSHIFTRBki:%[0-9]+]]:vk8 = KSHIFTRBki [[COPY6]], 2
+ ; CHECK: [[KSHIFTLBki:%[0-9]+]]:vk8 = KSHIFTLBki [[KSHIFTRBki]], 1
+ ; CHECK: [[KNOTBkk:%[0-9]+]]:vk8 = KNOTBkk [[KSHIFTLBki]]
+ ; CHECK: [[KORBkk:%[0-9]+]]:vk8 = KORBkk [[KNOTBkk]], [[KSHIFTRBki]]
+ ; CHECK: [[KANDBkk:%[0-9]+]]:vk8 = KANDBkk [[KORBkk]], [[KSHIFTLBki]]
+ ; CHECK: [[KXORBkk:%[0-9]+]]:vk8 = KXORBkk [[KANDBkk]], [[KSHIFTRBki]]
+ ; CHECK: [[KADDBkk:%[0-9]+]]:vk8 = KADDBkk [[KXORBkk]], [[KNOTBkk]]
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
- ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBrr]]
+ ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBkk]]
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPDZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPDZrrk]]
@@ -393,14 +393,14 @@ body: |
; CHECK: [[VCMPPSZrri:%[0-9]+]]:vk16 = VCMPPSZrri [[COPY3]], [[COPY4]], 0
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPSZrri]]
; CHECK: [[COPY6:%[0-9]+]]:vk16 = COPY [[COPY5]]
- ; CHECK: [[KSHIFTRWri:%[0-9]+]]:vk16 = KSHIFTRWri [[COPY6]], 2
- ; CHECK: [[KSHIFTLWri:%[0-9]+]]:vk16 = KSHIFTLWri [[KSHIFTRWri]], 1
- ; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[KSHIFTLWri]]
- ; CHECK: [[KORWrr:%[0-9]+]]:vk16 = KORWrr [[KNOTWrr]], [[KSHIFTRWri]]
- ; CHECK: [[KANDWrr:%[0-9]+]]:vk16 = KANDWrr [[KORWrr]], [[KSHIFTLWri]]
- ; CHECK: [[KXORWrr:%[0-9]+]]:vk16 = KXORWrr [[KANDWrr]], [[KSHIFTRWri]]
+ ; CHECK: [[KSHIFTRWki:%[0-9]+]]:vk16 = KSHIFTRWki [[COPY6]], 2
+ ; CHECK: [[KSHIFTLWki:%[0-9]+]]:vk16 = KSHIFTLWki [[KSHIFTRWki]], 1
+ ; CHECK: [[KNOTWkk:%[0-9]+]]:vk16 = KNOTWkk [[KSHIFTLWki]]
+ ; CHECK: [[KORWkk:%[0-9]+]]:vk16 = KORWkk [[KNOTWkk]], [[KSHIFTRWki]]
+ ; CHECK: [[KANDWkk:%[0-9]+]]:vk16 = KANDWkk [[KORWkk]], [[KSHIFTLWki]]
+ ; CHECK: [[KXORWkk:%[0-9]+]]:vk16 = KXORWkk [[KANDWkk]], [[KSHIFTRWki]]
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
- ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWrr]]
+ ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWkk]]
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
@@ -501,15 +501,15 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
; CHECK: [[KMOVDkm:%[0-9]+]]:vk32 = KMOVDkm [[COPY]], 1, $noreg, 0, $noreg
- ; CHECK: [[KSHIFTRDri:%[0-9]+]]:vk32 = KSHIFTRDri [[KMOVDkm]], 2
- ; CHECK: [[KSHIFTLDri:%[0-9]+]]:vk32 = KSHIFTLDri [[KSHIFTRDri]], 1
- ; CHECK: [[KNOTDrr:%[0-9]+]]:vk32 = KNOTDrr [[KSHIFTLDri]]
- ; CHECK: [[KORDrr:%[0-9]+]]:vk32 = KORDrr [[KNOTDrr]], [[KSHIFTRDri]]
- ; CHECK: [[KANDDrr:%[0-9]+]]:vk32 = KANDDrr [[KORDrr]], [[KSHIFTLDri]]
- ; CHECK: [[KXORDrr:%[0-9]+]]:vk32 = KXORDrr [[KANDDrr]], [[KSHIFTRDri]]
- ; CHECK: [[KANDNDrr:%[0-9]+]]:vk32 = KANDNDrr [[KXORDrr]], [[KORDrr]]
- ; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[KANDNDrr]], [[KXORDrr]]
- ; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
+ ; CHECK: [[KSHIFTRDki:%[0-9]+]]:vk32 = KSHIFTRDki [[KMOVDkm]], 2
+ ; CHECK: [[KSHIFTLDki:%[0-9]+]]:vk32 = KSHIFTLDki [[KSHIFTRDki]], 1
+ ; CHECK: [[KNOTDkk:%[0-9]+]]:vk32 = KNOTDkk [[KSHIFTLDki]]
+ ; CHECK: [[KORDkk:%[0-9]+]]:vk32 = KORDkk [[KNOTDkk]], [[KSHIFTRDki]]
+ ; CHECK: [[KANDDkk:%[0-9]+]]:vk32 = KANDDkk [[KORDkk]], [[KSHIFTLDki]]
+ ; CHECK: [[KXORDkk:%[0-9]+]]:vk32 = KXORDkk [[KANDDkk]], [[KSHIFTRDki]]
+ ; CHECK: [[KANDNDkk:%[0-9]+]]:vk32 = KANDNDkk [[KXORDkk]], [[KORDkk]]
+ ; CHECK: [[KADDDkk:%[0-9]+]]:vk32 = KADDDkk [[KANDNDkk]], [[KXORDkk]]
+ ; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDkk]]
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
; CHECK: bb.1:
@@ -604,15 +604,15 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
; CHECK: [[KMOVQkm:%[0-9]+]]:vk64 = KMOVQkm [[COPY]], 1, $noreg, 0, $noreg
- ; CHECK: [[KSHIFTRQri:%[0-9]+]]:vk64 = KSHIFTRQri [[KMOVQkm]], 2
- ; CHECK: [[KSHIFTLQri:%[0-9]+]]:vk64 = KSHIFTLQri [[KSHIFTRQri]], 1
- ; CHECK: [[KNOTQrr:%[0-9]+]]:vk64 = KNOTQrr [[KSHIFTLQri]]
- ; CHECK: [[KORQrr:%[0-9]+]]:vk64 = KORQrr [[KNOTQrr]], [[KSHIFTRQri]]
- ; CHECK: [[KANDQrr:%[0-9]+]]:vk64 = KANDQrr [[KORQrr]], [[KSHIFTLQri]]
- ; CHECK: [[KXORQrr:%[0-9]+]]:vk64 = KXORQrr [[KANDQrr]], [[KSHIFTRQri]]
- ; CHECK: [[KANDNQrr:%[0-9]+]]:vk64 = KANDNQrr [[KXORQrr]], [[KORQrr]]
- ; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[KANDNQrr]], [[KXORQrr]]
- ; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
+ ; CHECK: [[KSHIFTRQki:%[0-9]+]]:vk64 = KSHIFTRQki [[KMOVQkm]], 2
+ ; CHECK: [[KSHIFTLQki:%[0-9]+]]:vk64 = KSHIFTLQki [[KSHIFTRQki]], 1
+ ; CHECK: [[KNOTQkk:%[0-9]+]]:vk64 = KNOTQkk [[KSHIFTLQki]]
+ ; CHECK: [[KORQkk:%[0-9]+]]:vk64 = KORQkk [[KNOTQkk]], [[KSHIFTRQki]]
+ ; CHECK: [[KANDQkk:%[0-9]+]]:vk64 = KANDQkk [[KORQkk]], [[KSHIFTLQki]]
+ ; CHECK: [[KXORQkk:%[0-9]+]]:vk64 = KXORQkk [[KANDQkk]], [[KSHIFTRQki]]
+ ; CHECK: [[KANDNQkk:%[0-9]+]]:vk64 = KANDNQkk [[KXORQkk]], [[KORQkk]]
+ ; CHECK: [[KADDQkk:%[0-9]+]]:vk64 = KADDQkk [[KANDNQkk]], [[KXORQkk]]
+ ; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQkk]]
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
; CHECK: bb.1:
@@ -702,8 +702,8 @@ body: |
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
; CHECK: [[COPY3:%[0-9]+]]:vk16 = COPY [[KMOVBkm]]
- ; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[COPY3]]
- ; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWrr]]
+ ; CHECK: [[KNOTWkk:%[0-9]+]]:vk16 = KNOTWkk [[COPY3]]
+ ; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWkk]]
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY4]], [[COPY1]]
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
; CHECK: RET 0
@@ -774,8 +774,8 @@ body: |
; CHECK: [[COPY3:%[0-9]+]]:vk32 = COPY [[KMOVBkm]]
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
; CHECK: [[COPY4:%[0-9]+]]:vk32 = COPY [[KMOVWkm]]
- ; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[COPY3]], [[COPY4]]
- ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDrr]]
+ ; CHECK: [[KADDDkk:%[0-9]+]]:vk32 = KADDDkk [[COPY3]], [[COPY4]]
+ ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDkk]]
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
; CHECK: RET 0
@@ -847,8 +847,8 @@ body: |
; CHECK: [[COPY3:%[0-9]+]]:vk64 = COPY [[KMOVBkm]]
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
; CHECK: [[COPY4:%[0-9]+]]:vk64 = COPY [[KMOVWkm]]
- ; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[COPY3]], [[COPY4]]
- ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
+ ; CHECK: [[KADDQkk:%[0-9]+]]:vk64 = KADDQkk [[COPY3]], [[COPY4]]
+ ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQkk]]
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
; CHECK: RET 0
diff --git a/llvm/test/CodeGen/X86/domain-reassignment.mir b/llvm/test/CodeGen/X86/domain-reassignment.mir
index dcd435619990cf..fc0f32c7d4a947 100644
--- a/llvm/test/CodeGen/X86/domain-reassignment.mir
+++ b/llvm/test/CodeGen/X86/domain-reassignment.mir
@@ -273,15 +273,15 @@ body: |
; CHECK: [[VCMPPDZrri:%[0-9]+]]:vk8 = VCMPPDZrri [[COPY3]], [[COPY4]], 0
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPDZrri]]
; CHECK: [[COPY6:%[0-9]+]]:vk8 = COPY [[COPY5]]
- ; CHECK: [[KSHIFTRBri:%[0-9]+]]:vk8 = KSHIFTRBri [[COPY6]], 2
- ; CHECK: [[KSHIFTLBri:%[0-9]+]]:vk8 = KSHIFTLBri [[KSHIFTRBri]], 1
- ; CHECK: [[KNOTBrr:%[0-9]+]]:vk8 = KNOTBrr [[KSHIFTLBri]]
- ; CHECK: [[KORBrr:%[0-9]+]]:vk8 = KORBrr [[KNOTBrr]], [[KSHIFTRBri]]
- ; CHECK: [[KANDBrr:%[0-9]+]]:vk8 = KANDBrr [[KORBrr]], [[KSHIFTLBri]]
- ; CHECK: [[KXORBrr:%[0-9]+]]:vk8 = KXORBrr [[KANDBrr]], [[KSHIFTRBri]]
- ; CHECK: [[KADDBrr:%[0-9]+]]:vk8 = KADDBrr [[KXORBrr]], [[KNOTBrr]]
+ ; CHECK: [[KSHIFTRBki:%[0-9]+]]:vk8 = KSHIFTRBki [[COPY6]], 2
+ ; CHECK: [[KSHIFTLBki:%[0-9]+]]:vk8 = KSHIFTLBki [[KSHIFTRBki]], 1
+ ; CHECK: [[KNOTBkk:%[0-9]+]]:vk8 = KNOTBkk [[KSHIFTLBki]]
+ ; CHECK: [[KORBkk:%[0-9]+]]:vk8 = KORBkk [[KNOTBkk]], [[KSHIFTRBki]]
+ ; CHECK: [[KANDBkk:%[0-9]+]]:vk8 = KANDBkk [[KORBkk]], [[KSHIFTLBki]]
+ ; CHECK: [[KXORBkk:%[0-9]+]]:vk8 = KXORBkk [[KANDBkk]], [[KSHIFTRBki]]
+ ; CHECK: [[KADDBkk:%[0-9]+]]:vk8 = KADDBkk [[KXORBkk]], [[KNOTBkk]]
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
- ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBrr]]
+ ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBkk]]
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPDZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPDZrrk]]
@@ -393,14 +393,14 @@ body: |
; CHECK: [[VCMPPSZrri:%[0-9]+]]:vk16 = VCMPPSZrri [[COPY3]], [[COPY4]], 0
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPSZrri]]
; CHECK: [[COPY6:%[0-9]+]]:vk16 = COPY [[COPY5]]
- ; CHECK: [[KSHIFTRWri:%[0-9]+]]:vk16 = KSHIFTRWri [[COPY6]], 2
- ; CHECK: [[KSHIFTLWri:%[0-9]+]]:vk16 = KSHIFTLWri [[KSHIFTRWri]], 1
- ; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[KSHIFTLWri]]
- ; CHECK: [[KORWrr:%[0-9]+]]:vk16 = KORWrr [[KNOTWrr]], [[KSHIFTRWri]]
- ; CHECK: [[KANDWrr:%[0-9]+]]:vk16 = KANDWrr [[KORWrr]], [[KSHIFTLWri]]
- ; CHECK: [[KXORWrr:%[0-9]+]]:vk16 = KXORWrr [[KANDWrr]], [[KSHIFTRWri]]
+ ; CHECK: [[KSHIFTRWki:%[0-9]+]]:vk16 = KSHIFTRWki [[COPY6]], 2
+ ; CHECK: [[KSHIFTLWki:%[0-9]+]]:vk16 = KSHIFTLWki [[KSHIFTRWki]], 1
+ ; CHECK: [[KNOTWkk:%[0-9]+]]:vk16 = KNOTWkk [[KSHIFTLWki]]
+ ; CHECK: [[KORWkk:%[0-9]+]]:vk16 = KORWkk [[KNOTWkk]], [[KSHIFTRWki]]
+ ; CHECK: [[KANDWkk:%[0-9]+]]:vk16 = KANDWkk [[KORWkk]], [[KSHIFTLWki]]
+ ; CHECK: [[KXORWkk:%[0-9]+]]:vk16 = KXORWkk [[KANDWkk]], [[KSHIFTRWki]]
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
- ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWrr]]
+ ; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWkk]]
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
@@ -501,15 +501,15 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
; CHECK: [[KMOVDkm:%[0-9]+]]:vk32 = KMOVDkm [[COPY]], 1, $noreg, 0, $noreg
- ; CHECK: [[KSHIFTRDri:%[0-9]+]]:vk32 = KSHIFTRDri [[KMOVDkm]], 2
- ; CHECK: [[KSHIFTLDri:%[0-9]+]]:vk32 = KSHIFTLDri [[KSHIFTRDri]], 1
- ; CHECK: [[KNOTDrr:%[0-9]+]]:vk32 = KNOTDrr [[KSHIFTLDri]]
- ; CHECK: [[KORDrr:%[0-9]+]]:vk32 = KORDrr [[KNOTDrr]], [[KSHIFTRDri]]
- ; CHECK: [[KANDDrr:%[0-9]+]]:vk32 = KANDDrr [[KORDrr]], [[KSHIFTLDri]]
- ; CHECK: [[KXORDrr:%[0-9]+]]:vk32 = KXORDrr [[KANDDrr]], [[KSHIFTRDri]]
- ; CHECK: [[KANDNDrr:%[0-9]+]]:vk32 = KANDNDrr [[KXORDrr]], [[KORDrr]]
- ; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[KANDNDrr]], [[KXORDrr]]
- ; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
+ ; CHECK: [[KSHIFTRDki:%[0-9]+]]:vk32 = KSHIFTRDki [[KMOVDkm]], 2
+ ; CHECK: [[KSHIFTLDki:%[0-9]+]]:vk32 = KSHIFTLDki [[KSHIFTRDki]], 1
+ ; CHECK: [[KNOTDkk:%[0-9]+]]:vk32 = KNOTDkk [[KSHIFTLDki]]
+ ; CHECK: [[KORDkk:%[0-9]+]]:vk32 = KORDkk [[KNOTDkk]], [[KSHIFTRDki]]
+ ; CHECK: [[KANDDkk:%[0-9]+]]:vk32 = KANDDkk [[KORDkk]], [[KSHIFTLDki]]
+ ; CHECK: [[KXORDkk:%[0-9]+]]:vk32 = KXORDkk [[KANDDkk]], [[KSHIFTRDki]]
+ ; CHECK: [[KANDNDkk:%[0-9]+]]:vk32 = KANDNDkk [[KXORDkk]], [[KORDkk]]
+ ; CHECK: [[KADDDkk:%[0-9]+]]:vk32 = KADDDkk [[KANDNDkk]], [[KXORDkk]]
+ ; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDkk]]
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
; CHECK: bb.1:
@@ -604,15 +604,15 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
; CHECK: [[KMOVQkm:%[0-9]+]]:vk64 = KMOVQkm [[COPY]], 1, $noreg, 0, $noreg
- ; CHECK: [[KSHIFTRQri:%[0-9]+]]:vk64 = KSHIFTRQri [[KMOVQkm]], 2
- ; CHECK: [[KSHIFTLQri:%[0-9]+]]:vk64 = KSHIFTLQri [[KSHIFTRQri]], 1
- ; CHECK: [[KNOTQrr:%[0-9]+]]:vk64 = KNOTQrr [[KSHIFTLQri]]
- ; CHECK: [[KORQrr:%[0-9]+]]:vk64 = KORQrr [[KNOTQrr]], [[KSHIFTRQri]]
- ; CHECK: [[KANDQrr:%[0-9]+]]:vk64 = KANDQrr [[KORQrr]], [[KSHIFTLQri]]
- ; CHECK: [[KXORQrr:%[0-9]+]]:vk64 = KXORQrr [[KANDQrr]], [[KSHIFTRQri]]
- ; CHECK: [[KANDNQrr:%[0-9]+]]:vk64 = KANDNQrr [[KXORQrr]], [[KORQrr]]
- ; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[KANDNQrr]], [[KXORQrr]]
- ; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
+ ; CHECK: [[KSHIFTRQki:%[0-9]+]]:vk64 = KSHIFTRQki [[KMOVQkm]], 2
+ ; CHECK: [[KSHIFTLQki:%[0-9]+]]:vk64 = KSHIFTLQki [[KSHIFTRQki]], 1
+ ; CHECK: [[KNOTQkk:%[0-9]+]]:vk64 = KNOTQkk [[KSHIFTLQki]]
+ ; CHECK: [[KORQkk:%[0-9]+]]:vk64 = KORQkk [[KNOTQkk]], [[KSHIFTRQki]]
+ ; CHECK: [[KANDQkk:%[0-9]+]]:vk64 = KANDQkk [[KORQkk]], [[KSHIFTLQki]]
+ ; CHECK: [[KXORQkk:%[0-9]+]]:vk64 = KXORQkk [[KANDQkk]], [[KSHIFTRQki]]
+ ; CHECK: [[KANDNQkk:%[0-9]+]]:vk64 = KANDNQkk [[KXORQkk]], [[KORQkk]]
+ ; CHECK: [[KADDQkk:%[0-9]+]]:vk64 = KADDQkk [[KANDNQkk]], [[KXORQkk]]
+ ; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQkk]]
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
; CHECK: bb.1:
@@ -702,8 +702,8 @@ body: |
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
; CHECK: [[COPY3:%[0-9]+]]:vk16 = COPY [[KMOVBkm]]
- ; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[COPY3]]
- ; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWrr]]
+ ; CHECK: [[KNOTWkk:%[0-9]+]]:vk16 = KNOTWkk [[COPY3]]
+ ; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWkk]]
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY4]], [[COPY1]]
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
; CHECK: RET 0
@@ -774,8 +774,8 @@ body: |
; CHECK: [[COPY3:%[0-9]+]]:vk32 = COPY [[KMOVBkm]]
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
; CHECK: [[COPY4:%[0-9]+]]:vk32 = COPY [[KMOVWkm]]
- ; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[COPY3]], [[COPY4]]
- ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDrr]]
+ ; CHECK: [[KADDDkk:%[0-9]+]]:vk32 = KADDDkk [[COPY3]], [[COPY4]]
+ ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDkk]]
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
; CHECK: RET 0
@@ -847,8 +847,8 @@ body: |
; CHECK: [[COPY3:%[0-9]+]]:vk64 = COPY [[KMOVBkm]]
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
; CHECK: [[COPY4:%[0-9]+]]:vk64 = COPY [[KMOVWkm]]
- ; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[COPY3]], [[COPY4]]
- ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
+ ; CHECK: [[KADDQkk:%[0-9]+]]:vk64 = KADDQkk [[COPY3]], [[COPY4]]
+ ; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQkk]]
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
; CHECK: RET 0
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index 2a557ac9b97b3a..b944712e4863d7 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -9,9 +9,9 @@ define void @_Z3fooiPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
- ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
+ ; CHECK-NEXT: [[VPMOVW2MZ128kr:%[0-9]+]]:vk16wm = VPMOVW2MZ128kr killed [[VPSLLWZ128ri]]
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
- ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1)
+ ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128kr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1)
; CHECK-NEXT: RET 0
entry:
%1 = trunc <8 x i32> %gepload to <8 x i16>
@@ -28,9 +28,9 @@ define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
- ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
+ ; CHECK-NEXT: [[VPMOVW2MZ128kr:%[0-9]+]]:vk16wm = VPMOVW2MZ128kr killed [[VPSLLWZ128ri]]
; CHECK-NEXT: [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
- ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
+ ; CHECK-NEXT: VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128kr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
; CHECK-NEXT: RET 0
entry:
%1 = trunc <8 x i32> %gepload to <8 x i16>
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
index b364625a1e6f15..a153def4a71a6c 100644
--- a/llvm/test/CodeGen/X86/masked_expandload_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -9,8 +9,8 @@ define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
- ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
- ; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
+ ; CHECK-NEXT: [[VPMOVW2MZ128kr:%[0-9]+]]:vk16wm = VPMOVW2MZ128kr killed [[VPSLLWZ128ri]]
+ ; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128kr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
; CHECK-NEXT: RET 0, $xmm0
entry:
@@ -26,8 +26,8 @@ define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
; CHECK-NEXT: [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
- ; CHECK-NEXT: [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
- ; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+ ; CHECK-NEXT: [[VPMOVW2MZ128kr:%[0-9]+]]:vk16wm = VPMOVW2MZ128kr killed [[VPSLLWZ128ri]]
+ ; CHECK-NEXT: [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128kr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
; CHECK-NEXT: $xmm0 = COPY [[VPEXPANDWZ128rmk]]
; CHECK-NEXT: RET 0, $xmm0
entry:
More information about the llvm-commits
mailing list