[llvm] [X86] Support EGPR for KMOV (PR #73781)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 03:26:18 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Shengchen Kan (KanRobert)
<details>
<summary>Changes</summary>
KMOV is essential for copy between k-registers and GPRs.
After EGPR is introduced in #<!-- -->70958, we should extend KMOV for these new
registers.
TAG: CPU2017 can be built with feature egpr successfully.
---
Patch is 39.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73781.diff
14 Files Affected:
- (modified) llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp (+2-1)
- (modified) llvm/lib/Target/X86/X86DomainReassignment.cpp (+19-13)
- (modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+9-4)
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+45-25)
- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+26-7)
- (modified) llvm/lib/Target/X86/X86InstrInfo.td (+2)
- (added) llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll (+14)
- (added) llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll (+58)
- (added) llvm/test/CodeGen/X86/apx/kmov-isel.ll (+103)
- (added) llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll (+90)
- (added) llvm/test/MC/Disassembler/X86/apx/kmov.txt (+82)
- (added) llvm/test/MC/X86/apx/kmov-att.s (+69)
- (added) llvm/test/MC/X86/apx/kmov-intel.s (+66)
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+4)
``````````diff
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index aadbc3845b79c18..cab2f0a2e1c1a2b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
O << "\t{vex2}";
else if (Flags & X86::IP_USE_VEX3)
O << "\t{vex3}";
- else if (Flags & X86::IP_USE_EVEX)
+ else if ((Flags & X86::IP_USE_EVEX) ||
+ (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix)
O << "\t{evex}";
if (Flags & X86::IP_USE_DISP8)
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index fa8d5c752a3d273..be7e8db95b98ed8 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() {
std::make_unique<InstrReplacerDstCOPY>(From, To);
};
- createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
- createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
+ bool HasEGPR = STI->hasEGPR();
+ createReplacerDstCOPY(X86::MOVZX32rm16,
+ HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+ createReplacerDstCOPY(X86::MOVZX64rm16,
+ HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
if (STI->hasDQI()) {
- createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX16rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX32rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX64rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
@@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() {
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};
- createReplacer(X86::MOV16rm, X86::KMOVWkm);
- createReplacer(X86::MOV16mr, X86::KMOVWmk);
+ createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+ createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
createReplacer(X86::MOV16rr, X86::KMOVWkk);
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
@@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR16rr, X86::KXORWrr);
if (STI->hasBWI()) {
- createReplacer(X86::MOV32rm, X86::KMOVDkm);
- createReplacer(X86::MOV64rm, X86::KMOVQkm);
+ createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
+ createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
- createReplacer(X86::MOV32mr, X86::KMOVDmk);
- createReplacer(X86::MOV64mr, X86::KMOVQmk);
+ createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
+ createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
createReplacer(X86::MOV32rr, X86::KMOVDkk);
createReplacer(X86::MOV64rr, X86::KMOVQkk);
@@ -695,8 +701,8 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::AND8rr, X86::KANDBrr);
- createReplacer(X86::MOV8rm, X86::KMOVBkm);
- createReplacer(X86::MOV8mr, X86::KMOVBmk);
+ createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
createReplacer(X86::MOV8rr, X86::KMOVBkk);
createReplacer(X86::NOT8r, X86::KNOTBrr);
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 085fa9280b0eaee..29da95459544785 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -466,10 +466,15 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
- auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
- .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
- .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
+ bool HasEGPR = STI->hasEGPR();
+ auto MIBLo =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
+ .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHi =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
+ .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
for (int i = 0; i < X86::AddrNumOperands; ++i) {
MIBLo.add(MBBI->getOperand(1 + i));
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b5dac7a0c65afc8..f325f47d46464c3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2853,46 +2853,56 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
// - copy from GPR to mask register and vice versa
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
- string OpcodeStr, RegisterClass KRC,
- ValueType vvt, X86MemOperand x86memop> {
+ string OpcodeStr, RegisterClass KRC, ValueType vvt,
+ X86MemOperand x86memop, string Suffix = ""> {
+ let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
- def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
- def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vvt (load addr:$src)))]>,
- Sched<[WriteLoad]>;
- def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store KRC:$src, addr:$dst)]>,
- Sched<[WriteStore]>;
+ def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
+ def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set KRC:$dst, (vvt (load addr:$src)))]>,
+ Sched<[WriteLoad]>;
+ def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store KRC:$src, addr:$dst)]>,
+ Sched<[WriteStore]>;
+ }
}
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
- string OpcodeStr,
- RegisterClass KRC, RegisterClass GRC> {
- let hasSideEffects = 0 in {
- def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
- def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
+ string OpcodeStr, RegisterClass KRC,
+ RegisterClass GRC, string Suffix = ""> {
+ let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
+ def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
+ def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
}
}
-let Predicates = [HasDQI] in
+let Predicates = [HasDQI, NoEGPR] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
VEX, PD;
+let Predicates = [HasDQI, HasEGPR, In64BitMode] in
+ defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
+ EVEX, PD;
-let Predicates = [HasAVX512] in
+let Predicates = [HasAVX512, NoEGPR] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
+let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
+ defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
+ EVEX, PS;
-let Predicates = [HasBWI] in {
+let Predicates = [HasBWI, NoEGPR] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
VEX, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
@@ -2902,6 +2912,16 @@ let Predicates = [HasBWI] in {
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
VEX, XD, REX_W;
}
+let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
+ defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
+ EVEX, PD, REX_W;
+ defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
+ EVEX, XD;
+ defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
+ EVEX, PS, REX_W;
+ defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
+ EVEX, XD, REX_W;
+}
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d41c5323020d3cb..99256bec113d22a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -495,10 +495,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8rm:
case X86::KMOVBkm:
+ case X86::KMOVBkm_EVEX:
MemBytes = 1;
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
+ case X86::KMOVWkm_EVEX:
case X86::VMOVSHZrm:
case X86::VMOVSHZrm_alt:
MemBytes = 2;
@@ -511,6 +513,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSZrm:
case X86::VMOVSSZrm_alt:
case X86::KMOVDkm:
+ case X86::KMOVDkm_EVEX:
MemBytes = 4;
return true;
case X86::MOV64rm:
@@ -524,6 +527,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::KMOVQkm:
+ case X86::KMOVQkm_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSrm:
@@ -593,10 +597,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8mr:
case X86::KMOVBmk:
+ case X86::KMOVBmk_EVEX:
MemBytes = 1;
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
+ case X86::KMOVWmk_EVEX:
case X86::VMOVSHZmr:
MemBytes = 2;
return true;
@@ -605,6 +611,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSmr:
case X86::VMOVSSZmr:
case X86::KMOVDmk:
+ case X86::KMOVDmk_EVEX:
MemBytes = 4;
return true;
case X86::MOV64mr:
@@ -616,6 +623,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
case X86::KMOVQmk:
+ case X86::KMOVQmk_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSmr:
@@ -3519,6 +3527,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
const X86Subtarget &Subtarget) {
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasEGPR = Subtarget.hasEGPR();
// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
@@ -3527,10 +3536,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
assert(Subtarget.hasBWI());
- return X86::KMOVQrk;
+ return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
- return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
+ return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
+ : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
}
// SrcReg(GR64) -> DestReg(MaskReg)
@@ -3540,10 +3550,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
assert(Subtarget.hasBWI());
- return X86::KMOVQkr;
+ return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
- return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
+ return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
+ : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
}
@@ -3710,6 +3721,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
+ bool HasEGPR = STI.hasEGPR();
assert(RC != nullptr && "Invalid target register class");
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
@@ -3725,7 +3737,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::MOV8rm : X86::MOV8mr;
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
- return Load ? X86::KMOVWkm : X86::KMOVWmk;
+ return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
+ : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return Load ? X86::MOV16rm : X86::MOV16mr;
case 4:
@@ -3743,7 +3756,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
if (X86::VK32RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVD requires BWI");
- return Load ? X86::KMOVDkm : X86::KMOVDmk;
+ return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
+ : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
}
// All of these mask pair classes have the same spill size, the same kind
// of kmov instructions can be used with all of them.
@@ -3774,7 +3788,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVQ requires BWI");
- return Load ? X86::KMOVQkm : X86::KMOVQmk;
+ return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
+ : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
}
llvm_unreachable("Unknown 8-byte regclass");
case 10:
@@ -7717,9 +7732,13 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU64Zrm:
case X86::KMOVBkm:
+ case X86::KMOVBkm_EVEX:
case X86::KMOVWkm:
+ case X86::KMOVWkm_EVEX:
case X86::KMOVDkm:
+ case X86::KMOVDkm_EVEX:
case X86::KMOVQkm:
+ case X86::KMOVQkm_EVEX:
return true;
}
};
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 9046b6af463acf9..07c05d6eb3f844b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -878,6 +878,8 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
// X86 Instruction Predicate Definitions.
def TruePredicate : Predicate<"true">;
+def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
+def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
diff --git a/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
new file mode 100644
index 000000000000000..747b288ec2f0328
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
+
+define void @kmov(i1 %cmp23.not) {
+; CHECK-LABEL: kmov:
+; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+;
+; EGPR-LABEL: kmov:
+; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
+entry:
+ %0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
+ store double %0, ptr null, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll b/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
new file mode 100644
index 000000000000000..b09a14cee957427
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-domain-reassignment -mattr=+avx512f,+avx512bw,+avx512dq,+egpr | FileCheck %s
+
+define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float %f2, float %f3, float %f4) {
+ ; CHECK-LABEL: name: test_fcmp_storei1
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $edi, $rdx, $xmm0, $xmm1, $xmm2, $xmm3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32x = COPY $xmm3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fr32x = COPY $xmm2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fr32x = COPY $xmm1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fr32x = COPY $xmm0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rdx
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr8 = COPY [[COPY5]].sub_8bit
+ ; CHECK-NEXT: TEST8ri killed [[COPY6]], 1, implicit-def $eflags
+ ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.if:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[VCMPSSZrr:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY3]], [[COPY2]], 0, implicit $mxcsr
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vk32 = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vk8 = COPY [[COPY8]]
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.else:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[VCMPSSZrr1:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY1]], [[COPY]], 0, implicit $mxcsr
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr1]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vk32 = COPY [[COPY10]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.exit:
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY9]], %bb.1
+ ; CHECK-NEXT: KMOVBmk_EVEX [[COPY4]], 1, $noreg, 0, $noreg, [[PHI]]
+ ; CHECK-NEXT: RET 0
+entry:
+ br i1 %cond, label %if, label %else
+
+if:
+ %cmp1 = fcmp oeq float %f1, %f2
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/73781
More information about the llvm-commits
mailing list