[llvm] [X86] Consistently use 'k' for predicate mask registers in instruction names (PR #108780)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 15:22:46 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
We use 'k' for move instructions and to indicate masked variants of evex instructions, but otherwise we're very inconsistent when we use 'k' vs 'r'.
---
Patch is 61.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108780.diff
13 Files Affected:
- (modified) llvm/lib/Target/X86/X86DomainReassignment.cpp (+62-62)
- (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+13-13)
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+23-23)
- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+23-23)
- (modified) llvm/lib/Target/X86/X86InstrVecCompiler.td (+15-15)
- (modified) llvm/lib/Target/X86/X86SchedIceLake.td (+16-16)
- (modified) llvm/lib/Target/X86/X86SchedSapphireRapids.td (+13-13)
- (modified) llvm/lib/Target/X86/X86SchedSkylakeServer.td (+16-16)
- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+10-10)
- (modified) llvm/test/CodeGen/X86/apx/domain-reassignment.mir (+39-39)
- (modified) llvm/test/CodeGen/X86/domain-reassignment.mir (+39-39)
- (modified) llvm/test/CodeGen/X86/masked_compressstore_isel.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/masked_expandload_isel.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 831944cce3afdd..4823183113989a 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -636,21 +636,21 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk));
createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
- createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
- createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
- createReplacer(X86::NOT16r, X86::KNOTWrr);
- createReplacer(X86::OR16rr, X86::KORWrr);
- createReplacer(X86::AND16rr, X86::KANDWrr);
- createReplacer(X86::XOR16rr, X86::KXORWrr);
+ createReplacer(X86::SHR16ri, X86::KSHIFTRWki);
+ createReplacer(X86::SHL16ri, X86::KSHIFTLWki);
+ createReplacer(X86::NOT16r, X86::KNOTWkk);
+ createReplacer(X86::OR16rr, X86::KORWkk);
+ createReplacer(X86::AND16rr, X86::KANDWkk);
+ createReplacer(X86::XOR16rr, X86::KXORWkk);
bool HasNDD = STI->hasNDD();
if (HasNDD) {
- createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWri);
- createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWri);
- createReplacer(X86::NOT16r_ND, X86::KNOTWrr);
- createReplacer(X86::OR16rr_ND, X86::KORWrr);
- createReplacer(X86::AND16rr_ND, X86::KANDWrr);
- createReplacer(X86::XOR16rr_ND, X86::KXORWrr);
+ createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWki);
+ createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWki);
+ createReplacer(X86::NOT16r_ND, X86::KNOTWkk);
+ createReplacer(X86::OR16rr_ND, X86::KORWkk);
+ createReplacer(X86::AND16rr_ND, X86::KANDWkk);
+ createReplacer(X86::XOR16rr_ND, X86::KXORWkk);
}
if (STI->hasBWI()) {
@@ -663,86 +663,86 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk));
createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk));
- createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
- createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
+ createReplacer(X86::SHR32ri, X86::KSHIFTRDki);
+ createReplacer(X86::SHR64ri, X86::KSHIFTRQki);
- createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
- createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
+ createReplacer(X86::SHL32ri, X86::KSHIFTLDki);
+ createReplacer(X86::SHL64ri, X86::KSHIFTLQki);
- createReplacer(X86::ADD32rr, X86::KADDDrr);
- createReplacer(X86::ADD64rr, X86::KADDQrr);
+ createReplacer(X86::ADD32rr, X86::KADDDkk);
+ createReplacer(X86::ADD64rr, X86::KADDQkk);
- createReplacer(X86::NOT32r, X86::KNOTDrr);
- createReplacer(X86::NOT64r, X86::KNOTQrr);
+ createReplacer(X86::NOT32r, X86::KNOTDkk);
+ createReplacer(X86::NOT64r, X86::KNOTQkk);
- createReplacer(X86::OR32rr, X86::KORDrr);
- createReplacer(X86::OR64rr, X86::KORQrr);
+ createReplacer(X86::OR32rr, X86::KORDkk);
+ createReplacer(X86::OR64rr, X86::KORQkk);
- createReplacer(X86::AND32rr, X86::KANDDrr);
- createReplacer(X86::AND64rr, X86::KANDQrr);
+ createReplacer(X86::AND32rr, X86::KANDDkk);
+ createReplacer(X86::AND64rr, X86::KANDQkk);
- createReplacer(X86::ANDN32rr, X86::KANDNDrr);
- createReplacer(X86::ANDN64rr, X86::KANDNQrr);
+ createReplacer(X86::ANDN32rr, X86::KANDNDkk);
+ createReplacer(X86::ANDN64rr, X86::KANDNQkk);
- createReplacer(X86::XOR32rr, X86::KXORDrr);
- createReplacer(X86::XOR64rr, X86::KXORQrr);
+ createReplacer(X86::XOR32rr, X86::KXORDkk);
+ createReplacer(X86::XOR64rr, X86::KXORQkk);
if (HasNDD) {
- createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDri);
- createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDri);
- createReplacer(X86::ADD32rr_ND, X86::KADDDrr);
- createReplacer(X86::NOT32r_ND, X86::KNOTDrr);
- createReplacer(X86::OR32rr_ND, X86::KORDrr);
- createReplacer(X86::AND32rr_ND, X86::KANDDrr);
- createReplacer(X86::XOR32rr_ND, X86::KXORDrr);
- createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQri);
- createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQri);
- createReplacer(X86::ADD64rr_ND, X86::KADDQrr);
- createReplacer(X86::NOT64r_ND, X86::KNOTQrr);
- createReplacer(X86::OR64rr_ND, X86::KORQrr);
- createReplacer(X86::AND64rr_ND, X86::KANDQrr);
- createReplacer(X86::XOR64rr_ND, X86::KXORQrr);
+ createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDki);
+ createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDki);
+ createReplacer(X86::ADD32rr_ND, X86::KADDDkk);
+ createReplacer(X86::NOT32r_ND, X86::KNOTDkk);
+ createReplacer(X86::OR32rr_ND, X86::KORDkk);
+ createReplacer(X86::AND32rr_ND, X86::KANDDkk);
+ createReplacer(X86::XOR32rr_ND, X86::KXORDkk);
+ createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQki);
+ createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQki);
+ createReplacer(X86::ADD64rr_ND, X86::KADDQkk);
+ createReplacer(X86::NOT64r_ND, X86::KNOTQkk);
+ createReplacer(X86::OR64rr_ND, X86::KORQkk);
+ createReplacer(X86::AND64rr_ND, X86::KANDQkk);
+ createReplacer(X86::XOR64rr_ND, X86::KXORQkk);
}
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
- // createReplacer(X86::TEST32rr, X86::KTESTDrr);
- // createReplacer(X86::TEST64rr, X86::KTESTQrr);
+ // createReplacer(X86::TEST32rr, X86::KTESTDkk);
+ // createReplacer(X86::TEST64rr, X86::KTESTQkk);
}
if (STI->hasDQI()) {
- createReplacer(X86::ADD8rr, X86::KADDBrr);
- createReplacer(X86::ADD16rr, X86::KADDWrr);
+ createReplacer(X86::ADD8rr, X86::KADDBkk);
+ createReplacer(X86::ADD16rr, X86::KADDWkk);
- createReplacer(X86::AND8rr, X86::KANDBrr);
+ createReplacer(X86::AND8rr, X86::KANDBkk);
createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk));
createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
- createReplacer(X86::NOT8r, X86::KNOTBrr);
+ createReplacer(X86::NOT8r, X86::KNOTBkk);
- createReplacer(X86::OR8rr, X86::KORBrr);
+ createReplacer(X86::OR8rr, X86::KORBkk);
- createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
- createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
+ createReplacer(X86::SHR8ri, X86::KSHIFTRBki);
+ createReplacer(X86::SHL8ri, X86::KSHIFTLBki);
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
- // createReplacer(X86::TEST8rr, X86::KTESTBrr);
- // createReplacer(X86::TEST16rr, X86::KTESTWrr);
+ // createReplacer(X86::TEST8rr, X86::KTESTBkk);
+ // createReplacer(X86::TEST16rr, X86::KTESTWkk);
- createReplacer(X86::XOR8rr, X86::KXORBrr);
+ createReplacer(X86::XOR8rr, X86::KXORBkk);
if (HasNDD) {
- createReplacer(X86::ADD8rr_ND, X86::KADDBrr);
- createReplacer(X86::ADD16rr_ND, X86::KADDWrr);
- createReplacer(X86::AND8rr_ND, X86::KANDBrr);
- createReplacer(X86::NOT8r_ND, X86::KNOTBrr);
- createReplacer(X86::OR8rr_ND, X86::KORBrr);
- createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBri);
- createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBri);
- createReplacer(X86::XOR8rr_ND, X86::KXORBrr);
+ createReplacer(X86::ADD8rr_ND, X86::KADDBkk);
+ createReplacer(X86::ADD16rr_ND, X86::KADDWkk);
+ createReplacer(X86::AND8rr_ND, X86::KANDBkk);
+ createReplacer(X86::NOT8r_ND, X86::KNOTBkk);
+ createReplacer(X86::OR8rr_ND, X86::KORBkk);
+ createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBki);
+ createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBki);
+ createReplacer(X86::XOR8rr_ND, X86::KXORBkk);
}
}
#undef GET_EGPR_IF_ENABLED
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d0a54ab8993c26..e8a9da1f17d962 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1647,10 +1647,10 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
// used. We're doing this late so we can prefer to fold the AND into masked
// comparisons. Doing that can be better for the live range of the mask
// register.
- case X86::KORTESTBrr:
- case X86::KORTESTWrr:
- case X86::KORTESTDrr:
- case X86::KORTESTQrr: {
+ case X86::KORTESTBkk:
+ case X86::KORTESTWkk:
+ case X86::KORTESTDkk:
+ case X86::KORTESTQkk: {
SDValue Op0 = N->getOperand(0);
if (Op0 != N->getOperand(1) || !N->isOnlyUserOf(Op0.getNode()) ||
!Op0.isMachineOpcode() || !onlyUsesZeroFlag(SDValue(N, 0)))
@@ -1661,10 +1661,10 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
switch (Op0.getMachineOpcode()) {
default:
continue;
- CASE(KANDBrr)
- CASE(KANDWrr)
- CASE(KANDDrr)
- CASE(KANDQrr)
+ CASE(KANDBkk)
+ CASE(KANDWkk)
+ CASE(KANDDkk)
+ CASE(KANDQkk)
}
unsigned NewOpc;
#define FROM_TO(A, B) \
@@ -1672,14 +1672,14 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
NewOpc = X86::B; \
break;
switch (Opc) {
- FROM_TO(KORTESTBrr, KTESTBrr)
- FROM_TO(KORTESTWrr, KTESTWrr)
- FROM_TO(KORTESTDrr, KTESTDrr)
- FROM_TO(KORTESTQrr, KTESTQrr)
+ FROM_TO(KORTESTBkk, KTESTBkk)
+ FROM_TO(KORTESTWkk, KTESTWkk)
+ FROM_TO(KORTESTDkk, KTESTDkk)
+ FROM_TO(KORTESTQkk, KTESTQkk)
}
// KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
// KAND instructions and KTEST use the same ISA feature.
- if (NewOpc == X86::KTESTWrr && !Subtarget->hasDQI())
+ if (NewOpc == X86::KTESTWkk && !Subtarget->hasDQI())
continue;
#undef FROM_TO
MachineSDNode *KTest = CurDAG->getMachineNode(
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b6bf34a8a0d31c..9ed59803c1f9d9 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2782,7 +2782,7 @@ multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
X86FoldableSchedWrite sched, Predicate prd> {
let Predicates = [prd] in
- def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+ def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (OpNode KRC:$src))]>,
Sched<[sched]>;
@@ -2807,14 +2807,14 @@ defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
def : Pat<(vnot VK8:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
def : Pat<(vnot VK4:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
def : Pat<(vnot VK2:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
def : Pat<(vnot VK1:$src),
- (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
+ (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
@@ -2823,7 +2823,7 @@ multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, Predicate prd,
bit IsCommutable> {
let Predicates = [prd], isCommutable = IsCommutable in
- def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
+ def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
@@ -2877,11 +2877,11 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode,
(COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
}
-defm : avx512_binop_pat<and, KANDWrr>;
-defm : avx512_binop_pat<vandn, KANDNWrr>;
-defm : avx512_binop_pat<or, KORWrr>;
-defm : avx512_binop_pat<vxnor, KXNORWrr>;
-defm : avx512_binop_pat<xor, KXORWrr>;
+defm : avx512_binop_pat<and, KANDWkk>;
+defm : avx512_binop_pat<vandn, KANDNWkk>;
+defm : avx512_binop_pat<or, KORWkk>;
+defm : avx512_binop_pat<vxnor, KXNORWkk>;
+defm : avx512_binop_pat<xor, KXORWkk>;
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
@@ -2889,13 +2889,13 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
- def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
+ def kk : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
(ins Src.KRC:$src1, Src.KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX, VVVV, VEX_L, Sched<[sched]>;
def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
- (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
+ (!cast<Instruction>(NAME#kk) Src.KRC:$src2, Src.KRC:$src1)>;
}
}
@@ -2908,7 +2908,7 @@ multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode, X86FoldableSchedWrite sched,
Predicate prd> {
let Predicates = [prd], Defs = [EFLAGS] in
- def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
+ def kk : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
Sched<[sched]>;
@@ -2935,7 +2935,7 @@ defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode, X86FoldableSchedWrite sched> {
let Predicates = [HasAVX512] in
- def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
+ def ki : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
[(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
@@ -3463,12 +3463,12 @@ def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
(v8i64 VR512:$src))),
- (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
- (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
+ (VMOVDQA32Zrrkz (KNOTWkk VK16WM:$mask), VR512:$src)>;
// These patterns exist to prevent the above patterns from introducing a second
// mask inversion when one already exists.
@@ -10425,7 +10425,7 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
-def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+def rk : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
EVEX, Sched<[Sched]>;
@@ -10448,7 +10448,7 @@ defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI
defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
- def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
+ def kr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
EVEX, Sched<[WriteMove]>;
@@ -10461,7 +10461,7 @@ multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
(_.KVT (COPY_TO_REGCLASS
- (!cast<Instruction>(Name#"Zrr")
+ (!cast<Instruction>(Name#"Zkr")
(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
_.RC:$src, _.SubRegIdx)),
_.KRC))>;
@@ -10499,14 +10499,14 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
// a target independent DAG combine likes to combine sext and trunc.
let Predicates = [HasDQI, NoBWI] in {
def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
- (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+ (VPMOVDBZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>;
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
- (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+ (VPMOVDWZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>;
}
let Predicates = [HasDQI, NoBWI, HasVLX] in {
def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
- (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
+ (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rk VK8:$src)))>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a74da000af0cee..57a894b09e0445 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6323,17 +6323,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// FIXME: A more advanced approach would be to choose the best input mask
// register based on context.
case X86::KSET0W:
- return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXORWkk), X86::K0);
case X86::KSET0D:
- return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXORDkk), X86::K0);
case X86::KSET0Q:
- return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXORQkk), X86::K0);
case X86::KSET1W:
- return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXNORWkk), X86::K0);
case X86::KSET1D:
- return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXNORDkk), X86::K0);
case X86::KSET1Q:
- return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
+ return Expand2AddrKreg(MIB, get(X86::KXNORQkk), X86::K0);
case TargetOpcode::LOAD_STACK_GUARD:
expandLoadStackGuard(MIB, *this);
return true;
@@ -9805,22 +9805,22 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
case X86::VXORPSZ256rr:
case X86::VXORPDZrr:
case X86::VXORPSZrr:
- case X86::KADDBrr:
- case X86::KADDWrr:
- case X86::KADDDrr:
- case X86::KADDQrr:
- case X86::KANDBrr:
- case X86::KANDWrr:
- case X86::KANDDrr:
- case X86::KANDQrr:
- case X86::KORBrr:
- case X86::KORWrr:
- case X86::KORDrr:
- case X86::KORQrr:
- case X86::KXORBrr:
- case X86::KXORWrr:
- case X86::KXORDrr:
- case X86::KXORQrr:
+ case X86::KADDBkk:
+ case X86::KADDWkk:
+ case X86::KADDDkk:
+ case X86::KADDQkk:
+ case X86::KANDBkk:
+ cas...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108780
More information about the llvm-commits
mailing list