[llvm] [X86][MC][CodeGen] Support EGPR for KMOV (PR #73781)
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 00:05:01 PST 2023
https://github.com/KanRobert updated https://github.com/llvm/llvm-project/pull/73781
>From 5adafcbcf092bb85774ef556ea2bffa24c47b352 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Wed, 29 Nov 2023 19:16:06 +0800
Subject: [PATCH 1/3] [X86] Support EGPR for KMOV
KMOV is essential for copy between k-registers and GPRs.
After EGPR is introduced in #70958, we should extend KMOV for these new
registers.
TAG: CPU2017 can be built with feature egpr successfully.
---
.../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 3 +-
llvm/lib/Target/X86/X86DomainReassignment.cpp | 32 +++---
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 13 ++-
llvm/lib/Target/X86/X86InstrAVX512.td | 70 +++++++-----
llvm/lib/Target/X86/X86InstrInfo.cpp | 33 ++++--
llvm/lib/Target/X86/X86InstrInfo.td | 2 +
.../apx/kmov-copy-to-from-asymmetric-reg.ll | 14 +++
.../CodeGen/X86/apx/kmov-domain-assignment.ll | 58 ++++++++++
llvm/test/CodeGen/X86/apx/kmov-isel.ll | 103 ++++++++++++++++++
.../CodeGen/X86/apx/kmov-postrapseudos.ll | 90 +++++++++++++++
llvm/test/MC/Disassembler/X86/apx/kmov.txt | 82 ++++++++++++++
llvm/test/MC/X86/apx/kmov-att.s | 69 ++++++++++++
llvm/test/MC/X86/apx/kmov-intel.s | 66 +++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 4 +
14 files changed, 589 insertions(+), 50 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
create mode 100644 llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
create mode 100644 llvm/test/CodeGen/X86/apx/kmov-isel.ll
create mode 100644 llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
create mode 100644 llvm/test/MC/Disassembler/X86/apx/kmov.txt
create mode 100644 llvm/test/MC/X86/apx/kmov-att.s
create mode 100644 llvm/test/MC/X86/apx/kmov-intel.s
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index aadbc3845b79c18..cab2f0a2e1c1a2b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
O << "\t{vex2}";
else if (Flags & X86::IP_USE_VEX3)
O << "\t{vex3}";
- else if (Flags & X86::IP_USE_EVEX)
+ else if ((Flags & X86::IP_USE_EVEX) ||
+ (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix)
O << "\t{evex}";
if (Flags & X86::IP_USE_DISP8)
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index fa8d5c752a3d273..be7e8db95b98ed8 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() {
std::make_unique<InstrReplacerDstCOPY>(From, To);
};
- createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
- createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
+ bool HasEGPR = STI->hasEGPR();
+ createReplacerDstCOPY(X86::MOVZX32rm16,
+ HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+ createReplacerDstCOPY(X86::MOVZX64rm16,
+ HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
if (STI->hasDQI()) {
- createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX16rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX32rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacerDstCOPY(X86::MOVZX64rm8,
+ HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
@@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() {
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};
- createReplacer(X86::MOV16rm, X86::KMOVWkm);
- createReplacer(X86::MOV16mr, X86::KMOVWmk);
+ createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+ createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
createReplacer(X86::MOV16rr, X86::KMOVWkk);
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
@@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR16rr, X86::KXORWrr);
if (STI->hasBWI()) {
- createReplacer(X86::MOV32rm, X86::KMOVDkm);
- createReplacer(X86::MOV64rm, X86::KMOVQkm);
+ createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
+ createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
- createReplacer(X86::MOV32mr, X86::KMOVDmk);
- createReplacer(X86::MOV64mr, X86::KMOVQmk);
+ createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
+ createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
createReplacer(X86::MOV32rr, X86::KMOVDkk);
createReplacer(X86::MOV64rr, X86::KMOVQkk);
@@ -695,8 +701,8 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::AND8rr, X86::KANDBrr);
- createReplacer(X86::MOV8rm, X86::KMOVBkm);
- createReplacer(X86::MOV8mr, X86::KMOVBmk);
+ createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+ createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
createReplacer(X86::MOV8rr, X86::KMOVBkk);
createReplacer(X86::NOT8r, X86::KNOTBrr);
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 085fa9280b0eaee..29da95459544785 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -466,10 +466,15 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
- auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
- .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
- .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
+ bool HasEGPR = STI->hasEGPR();
+ auto MIBLo =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
+ .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHi =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
+ .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
for (int i = 0; i < X86::AddrNumOperands; ++i) {
MIBLo.add(MBBI->getOperand(1 + i));
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b5dac7a0c65afc8..f325f47d46464c3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2853,46 +2853,56 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
// - copy from GPR to mask register and vice versa
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
- string OpcodeStr, RegisterClass KRC,
- ValueType vvt, X86MemOperand x86memop> {
+ string OpcodeStr, RegisterClass KRC, ValueType vvt,
+ X86MemOperand x86memop, string Suffix = ""> {
+ let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
- def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
- def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vvt (load addr:$src)))]>,
- Sched<[WriteLoad]>;
- def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store KRC:$src, addr:$dst)]>,
- Sched<[WriteStore]>;
+ def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
+ def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set KRC:$dst, (vvt (load addr:$src)))]>,
+ Sched<[WriteLoad]>;
+ def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store KRC:$src, addr:$dst)]>,
+ Sched<[WriteStore]>;
+ }
}
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
- string OpcodeStr,
- RegisterClass KRC, RegisterClass GRC> {
- let hasSideEffects = 0 in {
- def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
- def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
- Sched<[WriteMove]>;
+ string OpcodeStr, RegisterClass KRC,
+ RegisterClass GRC, string Suffix = ""> {
+ let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
+ def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
+ def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ Sched<[WriteMove]>;
}
}
-let Predicates = [HasDQI] in
+let Predicates = [HasDQI, NoEGPR] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
VEX, PD;
+let Predicates = [HasDQI, HasEGPR, In64BitMode] in
+ defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
+ EVEX, PD;
-let Predicates = [HasAVX512] in
+let Predicates = [HasAVX512, NoEGPR] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
+let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
+ defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
+ EVEX, PS;
-let Predicates = [HasBWI] in {
+let Predicates = [HasBWI, NoEGPR] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
VEX, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
@@ -2902,6 +2912,16 @@ let Predicates = [HasBWI] in {
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
VEX, XD, REX_W;
}
+let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
+ defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
+ EVEX, PD, REX_W;
+ defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
+ EVEX, XD;
+ defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
+ EVEX, PS, REX_W;
+ defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
+ EVEX, XD, REX_W;
+}
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d41c5323020d3cb..99256bec113d22a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -495,10 +495,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8rm:
case X86::KMOVBkm:
+ case X86::KMOVBkm_EVEX:
MemBytes = 1;
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
+ case X86::KMOVWkm_EVEX:
case X86::VMOVSHZrm:
case X86::VMOVSHZrm_alt:
MemBytes = 2;
@@ -511,6 +513,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSZrm:
case X86::VMOVSSZrm_alt:
case X86::KMOVDkm:
+ case X86::KMOVDkm_EVEX:
MemBytes = 4;
return true;
case X86::MOV64rm:
@@ -524,6 +527,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::KMOVQkm:
+ case X86::KMOVQkm_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSrm:
@@ -593,10 +597,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8mr:
case X86::KMOVBmk:
+ case X86::KMOVBmk_EVEX:
MemBytes = 1;
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
+ case X86::KMOVWmk_EVEX:
case X86::VMOVSHZmr:
MemBytes = 2;
return true;
@@ -605,6 +611,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSmr:
case X86::VMOVSSZmr:
case X86::KMOVDmk:
+ case X86::KMOVDmk_EVEX:
MemBytes = 4;
return true;
case X86::MOV64mr:
@@ -616,6 +623,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
case X86::KMOVQmk:
+ case X86::KMOVQmk_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSmr:
@@ -3519,6 +3527,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
const X86Subtarget &Subtarget) {
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasEGPR = Subtarget.hasEGPR();
// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
@@ -3527,10 +3536,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
assert(Subtarget.hasBWI());
- return X86::KMOVQrk;
+ return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
- return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
+ return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
+ : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
}
// SrcReg(GR64) -> DestReg(MaskReg)
@@ -3540,10 +3550,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
assert(Subtarget.hasBWI());
- return X86::KMOVQkr;
+ return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
- return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
+ return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
+ : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
}
@@ -3710,6 +3721,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
+ bool HasEGPR = STI.hasEGPR();
assert(RC != nullptr && "Invalid target register class");
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
@@ -3725,7 +3737,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::MOV8rm : X86::MOV8mr;
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
- return Load ? X86::KMOVWkm : X86::KMOVWmk;
+ return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
+ : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return Load ? X86::MOV16rm : X86::MOV16mr;
case 4:
@@ -3743,7 +3756,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
if (X86::VK32RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVD requires BWI");
- return Load ? X86::KMOVDkm : X86::KMOVDmk;
+ return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
+ : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
}
// All of these mask pair classes have the same spill size, the same kind
// of kmov instructions can be used with all of them.
@@ -3774,7 +3788,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVQ requires BWI");
- return Load ? X86::KMOVQkm : X86::KMOVQmk;
+ return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
+ : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
}
llvm_unreachable("Unknown 8-byte regclass");
case 10:
@@ -7717,9 +7732,13 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU64Zrm:
case X86::KMOVBkm:
+ case X86::KMOVBkm_EVEX:
case X86::KMOVWkm:
+ case X86::KMOVWkm_EVEX:
case X86::KMOVDkm:
+ case X86::KMOVDkm_EVEX:
case X86::KMOVQkm:
+ case X86::KMOVQkm_EVEX:
return true;
}
};
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 9046b6af463acf9..07c05d6eb3f844b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -878,6 +878,8 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
// X86 Instruction Predicate Definitions.
def TruePredicate : Predicate<"true">;
+def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
+def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
diff --git a/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
new file mode 100644
index 000000000000000..747b288ec2f0328
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
+
+define void @kmov(i1 %cmp23.not) {
+; CHECK-LABEL: kmov:
+; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+;
+; EGPR-LABEL: kmov:
+; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
+entry:
+ %0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
+ store double %0, ptr null, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll b/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
new file mode 100644
index 000000000000000..b09a14cee957427
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-domain-reassignment -mattr=+avx512f,+avx512bw,+avx512dq,+egpr | FileCheck %s
+
+define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float %f2, float %f3, float %f4) {
+ ; CHECK-LABEL: name: test_fcmp_storei1
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $edi, $rdx, $xmm0, $xmm1, $xmm2, $xmm3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32x = COPY $xmm3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fr32x = COPY $xmm2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fr32x = COPY $xmm1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fr32x = COPY $xmm0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rdx
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr8 = COPY [[COPY5]].sub_8bit
+ ; CHECK-NEXT: TEST8ri killed [[COPY6]], 1, implicit-def $eflags
+ ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.if:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[VCMPSSZrr:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY3]], [[COPY2]], 0, implicit $mxcsr
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vk32 = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vk8 = COPY [[COPY8]]
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.else:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[VCMPSSZrr1:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY1]], [[COPY]], 0, implicit $mxcsr
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr1]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vk32 = COPY [[COPY10]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.exit:
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY9]], %bb.1
+ ; CHECK-NEXT: KMOVBmk_EVEX [[COPY4]], 1, $noreg, 0, $noreg, [[PHI]]
+ ; CHECK-NEXT: RET 0
+entry:
+ br i1 %cond, label %if, label %else
+
+if:
+ %cmp1 = fcmp oeq float %f1, %f2
+ br label %exit
+
+else:
+ %cmp2 = fcmp oeq float %f3, %f4
+ br label %exit
+
+exit:
+ %val = phi i1 [%cmp1, %if], [%cmp2, %else]
+ store i1 %val, ptr %iptr
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/apx/kmov-isel.ll b/llvm/test/CodeGen/X86/apx/kmov-isel.ll
new file mode 100644
index 000000000000000..882269b4982613a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-isel.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr --show-mc-encoding | FileCheck --check-prefix=AVX512 %s
+
+define void @bitcast_16i8_store(ptr %p, <16 x i8> %a0) {
+; AVX512-LABEL: bitcast_16i8_store:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
+; AVX512-NEXT: kmovw %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x91,0x07]
+; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512-NEXT: retq # encoding: [0xc3]
+ %a1 = icmp slt <16 x i8> %a0, zeroinitializer
+ %a2 = bitcast <16 x i1> %a1 to i16
+ store i16 %a2, ptr %p
+ ret void
+}
+
+define void @bitcast_32i8_store(ptr %p, <32 x i8> %a0) {
+; AVX512-LABEL: bitcast_32i8_store:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
+; AVX512-NEXT: kmovd %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x91,0x07]
+; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512-NEXT: retq # encoding: [0xc3]
+ %a1 = icmp slt <32 x i8> %a0, zeroinitializer
+ %a2 = bitcast <32 x i1> %a1 to i32
+ store i32 %a2, ptr %p
+ ret void
+}
+
+define void @bitcast_64i8_store(ptr %p, <64 x i8> %a0) {
+; AVX512-LABEL: bitcast_64i8_store:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
+; AVX512-NEXT: kmovq %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x91,0x07]
+; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512-NEXT: retq # encoding: [0xc3]
+ %a1 = icmp slt <64 x i8> %a0, zeroinitializer
+ %a2 = bitcast <64 x i1> %a1 to i64
+ store i64 %a2, ptr %p
+ ret void
+}
+
+define <16 x i1> @bitcast_16i8_load(ptr %p, <16 x i1> %a, <16 x i1> %b) {
+; AVX512-LABEL: bitcast_16i8_load:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x71,0xf1,0x07]
+; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
+; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x07]
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; AVX512-NEXT: kmovw (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x90,0x0f]
+; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
+; AVX512-NEXT: kandnw %k0, %k1, %k0 # encoding: [0xc5,0xf4,0x42,0xc0]
+; AVX512-NEXT: korw %k0, %k2, %k0 # encoding: [0xc5,0xec,0x45,0xc0]
+; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512-NEXT: retq # encoding: [0xc3]
+ %mask = load i16, ptr %p
+ %vmask = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %vmask, <16 x i1> %a, <16 x i1> %b
+ ret <16 x i1> %res
+}
+
+define <32 x i1> @bitcast_32i8_load(ptr %p, <32 x i1> %a, <32 x i1> %b) {
+; AVX512-LABEL: bitcast_32i8_load:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsllw $7, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x71,0xf1,0x07]
+; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
+; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07]
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; AVX512-NEXT: kmovd (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x90,0x0f]
+; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
+; AVX512-NEXT: kandnd %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf5,0x42,0xc0]
+; AVX512-NEXT: kord %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x45,0xc0]
+; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq # encoding: [0xc3]
+ %mask = load i32, ptr %p
+ %vmask = bitcast i32 %mask to <32 x i1>
+ %res = select <32 x i1> %vmask, <32 x i1> %a, <32 x i1> %b
+ ret <32 x i1> %res
+}
+
+define <64 x i1> @bitcast_64i8_load(ptr %p, <64 x i1> %a, <64 x i1> %b) {
+; AVX512-LABEL: bitcast_64i8_load:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsllw $7, %zmm1, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xf1,0x07]
+; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
+; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; AVX512-NEXT: kmovq (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0x0f]
+; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
+; AVX512-NEXT: kandnq %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf4,0x42,0xc0]
+; AVX512-NEXT: korq %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xec,0x45,0xc0]
+; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
+; AVX512-NEXT: retq # encoding: [0xc3]
+ %mask = load i64, ptr %p
+ %vmask = bitcast i64 %mask to <64 x i1>
+ %res = select <64 x i1> %vmask, <64 x i1> %a, <64 x i1> %b
+ ret <64 x i1> %res
+}
diff --git a/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
new file mode 100644
index 000000000000000..7e1ce02ed8e89d9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=AVX512 %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr -show-mc-encoding | FileCheck --check-prefix=AVX512BW %s
+
+define void @kmovkr_1(i1 %cmp23.not) {
+; AVX512-LABEL: kmovkr_1:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
+; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A]
+; AVX512-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX512-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
+; AVX512-NEXT: retq # encoding: [0xc3]
+;
+; AVX512BW-LABEL: kmovkr_1:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: kmovd %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xcf]
+; AVX512BW-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A]
+; AVX512BW-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX512BW-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
+; AVX512BW-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
+ store double %0, ptr null, align 8
+ ret void
+}
+
+define void @kmovkr_2() {
+; AVX512-LABEL: kmovkr_2:
+; AVX512: # %bb.0: # %alloca_21
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; AVX512-NEXT: movw $3, %ax # encoding: [0x66,0xb8,0x03,0x00]
+; AVX512-NEXT: kmovw %eax, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xc8]
+; AVX512-NEXT: vmovups %zmm0, 0 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
+; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512-NEXT: retq # encoding: [0xc3]
+;
+; AVX512BW-LABEL: kmovkr_2:
+; AVX512BW: # %bb.0: # %alloca_21
+; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; AVX512BW-NEXT: movw $3, %ax # encoding: [0x66,0xb8,0x03,0x00]
+; AVX512BW-NEXT: kmovd %eax, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xc8]
+; AVX512BW-NEXT: vmovups %zmm0, 0 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
+; AVX512BW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512BW-NEXT: retq # encoding: [0xc3]
+alloca_21:
+ call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr null, i32 1, <4 x i1> <i1 true, i1 true, i1 false, i1 false>)
+ ret void
+}
+
+define i32 @kmovrk_1(<4 x ptr> %arg) {
+; AVX512-LABEL: kmovrk_1:
+; AVX512: # %bb.0: # %bb
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; AVX512-NEXT: kmovw %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x93,0xc0]
+; AVX512-NEXT: testb $15, %al # encoding: [0xa8,0x0f]
+; AVX512-NEXT: jne .LBB2_1 # encoding: [0x75,A]
+; AVX512-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; AVX512-NEXT: # %bb.2: # %bb3
+; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512-NEXT: retq # encoding: [0xc3]
+; AVX512-NEXT: .LBB2_1: # %bb2
+;
+; AVX512BW-LABEL: kmovrk_1:
+; AVX512BW: # %bb.0: # %bb
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; AVX512BW-NEXT: kmovd %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x93,0xc0]
+; AVX512BW-NEXT: testb $15, %al # encoding: [0xa8,0x0f]
+; AVX512BW-NEXT: jne .LBB2_1 # encoding: [0x75,A]
+; AVX512BW-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; AVX512BW-NEXT: # %bb.2: # %bb3
+; AVX512BW-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; AVX512BW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512BW-NEXT: retq # encoding: [0xc3]
+; AVX512BW-NEXT: .LBB2_1: # %bb2
+bb:
+ %icmp = icmp ne <4 x ptr> %arg, zeroinitializer
+ %freeze = freeze <4 x i1> %icmp
+ %bitcast = bitcast <4 x i1> %freeze to i4
+ %icmp1 = icmp ne i4 %bitcast, 0
+ br i1 %icmp1, label %bb2, label %bb3
+bb2:
+ unreachable
+bb3:
+ ret i32 0
+}
+
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr nocapture, i32 immarg, <4 x i1>)
diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
new file mode 100644
index 000000000000000..d089ef192230a54
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
@@ -0,0 +1,82 @@
+# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT
+# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL
+
+# ATT: kmovb %r16d, %k1
+# INTEL: kmovb k1, r16d
+0x62,0xf9,0x7d,0x08,0x92,0xc8
+
+# ATT: kmovw %r16d, %k1
+# INTEL: kmovw k1, r16d
+0x62,0xf9,0x7c,0x08,0x92,0xc8
+
+# ATT: kmovd %r16d, %k1
+# INTEL: kmovd k1, r16d
+0x62,0xf9,0x7f,0x08,0x92,0xc8
+
+# ATT: kmovq %r16, %k1
+# INTEL: kmovq k1, r16
+0x62,0xf9,0xff,0x08,0x92,0xc8
+
+# ATT: kmovb %k1, %r16d
+# INTEL: kmovb r16d, k1
+0x62,0xe1,0x7d,0x08,0x93,0xc1
+
+# ATT: kmovw %k1, %r16d
+# INTEL: kmovw r16d, k1
+0x62,0xe1,0x7c,0x08,0x93,0xc1
+
+# ATT: kmovd %k1, %r16d
+# INTEL: kmovd r16d, k1
+0x62,0xe1,0x7f,0x08,0x93,0xc1
+
+# ATT: kmovq %k1, %r16
+# INTEL: kmovq r16, k1
+0x62,0xe1,0xff,0x08,0x93,0xc1
+
+# ATT: kmovb (%r16,%r17), %k1
+# INTEL: kmovb k1, byte ptr [r16 + r17]
+0x62,0xf9,0x79,0x08,0x90,0x0c,0x08
+
+# ATT: kmovw (%r16,%r17), %k1
+# INTEL: kmovw k1, word ptr [r16 + r17]
+0x62,0xf9,0x78,0x08,0x90,0x0c,0x08
+
+# ATT: kmovd (%r16,%r17), %k1
+# INTEL: kmovd k1, dword ptr [r16 + r17]
+0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08
+
+# ATT: kmovq (%r16,%r17), %k1
+# INTEL: kmovq k1, qword ptr [r16 + r17]
+0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08
+
+# ATT: kmovb %k1, (%r16,%r17)
+# INTEL: kmovb byte ptr [r16 + r17], k1
+0x62,0xf9,0x79,0x08,0x91,0x0c,0x08
+
+# ATT: kmovw %k1, (%r16,%r17)
+# INTEL: kmovw word ptr [r16 + r17], k1
+0x62,0xf9,0x78,0x08,0x91,0x0c,0x08
+
+# ATT: kmovd %k1, (%r16,%r17)
+# INTEL: kmovd dword ptr [r16 + r17], k1
+0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08
+
+# ATT: kmovq %k1, (%r16,%r17)
+# INTEL: kmovq qword ptr [r16 + r17], k1
+0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08
+
+# ATT: {evex} kmovb %k1, %k2
+# INTEL: {evex} kmovb k2, k1
+0x62,0xf1,0x7d,0x08,0x90,0xd1
+
+# ATT: {evex} kmovw %k1, %k2
+# INTEL: {evex} kmovw k2, k1
+0x62,0xf1,0x7c,0x08,0x90,0xd1
+
+# ATT: {evex} kmovd %k1, %k2
+# INTEL: {evex} kmovd k2, k1
+0x62,0xf1,0xfd,0x08,0x90,0xd1
+
+# ATT: {evex} kmovq %k1, %k2
+# INTEL: {evex} kmovq k2, k1
+0x62,0xf1,0xfc,0x08,0x90,0xd1
diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s
new file mode 100644
index 000000000000000..be5042cf0a30c8f
--- /dev/null
+++ b/llvm/test/MC/X86/apx/kmov-att.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-20: error:
+# ERROR-NOT: error:
+# CHECK: kmovb %r16d, %k1
+# CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8]
+ kmovb %r16d, %k1
+# CHECK: kmovw %r16d, %k1
+# CHECK: encoding: [0x62,0xf9,0x7c,0x08,0x92,0xc8]
+ kmovw %r16d, %k1
+# CHECK: kmovd %r16d, %k1
+# CHECK: encoding: [0x62,0xf9,0x7f,0x08,0x92,0xc8]
+ kmovd %r16d, %k1
+# CHECK: kmovq %r16, %k1
+# CHECK: encoding: [0x62,0xf9,0xff,0x08,0x92,0xc8]
+ kmovq %r16, %k1
+
+# CHECK: kmovb %k1, %r16d
+# CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x93,0xc1]
+ kmovb %k1, %r16d
+# CHECK: kmovw %k1, %r16d
+# CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x93,0xc1]
+ kmovw %k1, %r16d
+# CHECK: kmovd %k1, %r16d
+# CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x93,0xc1]
+ kmovd %k1, %r16d
+# CHECK: kmovq %k1, %r16
+# CHECK: encoding: [0x62,0xe1,0xff,0x08,0x93,0xc1]
+ kmovq %k1, %r16
+
+# CHECK: kmovb (%r16,%r17), %k1
+# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x90,0x0c,0x08]
+ kmovb (%r16,%r17), %k1
+# CHECK: kmovw (%r16,%r17), %k1
+# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x90,0x0c,0x08]
+ kmovw (%r16,%r17), %k1
+# CHECK: kmovd (%r16,%r17), %k1
+# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08]
+ kmovd (%r16,%r17), %k1
+# CHECK: kmovq (%r16,%r17), %k1
+# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08]
+ kmovq (%r16,%r17), %k1
+
+# CHECK: kmovb %k1, (%r16,%r17)
+# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x91,0x0c,0x08]
+ kmovb %k1, (%r16,%r17)
+# CHECK: kmovw %k1, (%r16,%r17)
+# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x91,0x0c,0x08]
+ kmovw %k1, (%r16,%r17)
+# CHECK: kmovd %k1, (%r16,%r17)
+# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08]
+ kmovd %k1, (%r16,%r17)
+# CHECK: kmovq %k1, (%r16,%r17)
+# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08]
+ kmovq %k1, (%r16,%r17)
+
+# CHECK: {evex} kmovb %k1, %k2
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
+ {evex} kmovb %k1, %k2
+# CHECK: {evex} kmovw %k1, %k2
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1]
+ {evex} kmovw %k1, %k2
+# CHECK: {evex} kmovd %k1, %k2
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1]
+ {evex} kmovd %k1, %k2
+# CHECK: {evex} kmovq %k1, %k2
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
+ {evex} kmovq %k1, %k2
diff --git a/llvm/test/MC/X86/apx/kmov-intel.s b/llvm/test/MC/X86/apx/kmov-intel.s
new file mode 100644
index 000000000000000..8ceb29d32dba6c4
--- /dev/null
+++ b/llvm/test/MC/X86/apx/kmov-intel.s
@@ -0,0 +1,66 @@
+# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s
+
+# CHECK: kmovb k1, r16d
+# CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8]
+ kmovb k1, r16d
+# CHECK: kmovw k1, r16d
+# CHECK: encoding: [0x62,0xf9,0x7c,0x08,0x92,0xc8]
+ kmovw k1, r16d
+# CHECK: kmovd k1, r16d
+# CHECK: encoding: [0x62,0xf9,0x7f,0x08,0x92,0xc8]
+ kmovd k1, r16d
+# CHECK: kmovq k1, r16
+# CHECK: encoding: [0x62,0xf9,0xff,0x08,0x92,0xc8]
+ kmovq k1, r16
+
+# CHECK: kmovb r16d, k1
+# CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x93,0xc1]
+ kmovb r16d, k1
+# CHECK: kmovw r16d, k1
+# CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x93,0xc1]
+ kmovw r16d, k1
+# CHECK: kmovd r16d, k1
+# CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x93,0xc1]
+ kmovd r16d, k1
+# CHECK: kmovq r16, k1
+# CHECK: encoding: [0x62,0xe1,0xff,0x08,0x93,0xc1]
+ kmovq r16, k1
+
+# CHECK: kmovb k1, byte ptr [r16 + r17]
+# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x90,0x0c,0x08]
+ kmovb k1, byte ptr [r16 + r17]
+# CHECK: kmovw k1, word ptr [r16 + r17]
+# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x90,0x0c,0x08]
+ kmovw k1, word ptr [r16 + r17]
+# CHECK: kmovd k1, dword ptr [r16 + r17]
+# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08]
+ kmovd k1, dword ptr [r16 + r17]
+# CHECK: kmovq k1, qword ptr [r16 + r17]
+# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08]
+ kmovq k1, qword ptr [r16 + r17]
+
+# CHECK: kmovb byte ptr [r16 + r17], k1
+# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x91,0x0c,0x08]
+ kmovb byte ptr [r16 + r17], k1
+# CHECK: kmovw word ptr [r16 + r17], k1
+# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x91,0x0c,0x08]
+ kmovw word ptr [r16 + r17], k1
+# CHECK: kmovd dword ptr [r16 + r17], k1
+# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08]
+ kmovd dword ptr [r16 + r17], k1
+# CHECK: kmovq qword ptr [r16 + r17], k1
+# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08]
+ kmovq qword ptr [r16 + r17], k1
+
+# CHECK: {evex} kmovb k2, k1
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
+ {evex} kmovb k2, k1
+# CHECK: {evex} kmovw k2, k1
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1]
+ {evex} kmovw k2, k1
+# CHECK: {evex} kmovd k2, k1
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1]
+ {evex} kmovd k2, k1
+# CHECK: {evex} kmovq k2, k1
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
+ {evex} kmovq k2, k1
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index d4515161ee0858a..a0d35655ae4e60f 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -487,9 +487,13 @@ static const X86FoldTableEntry Table1[] = {
{X86::IMUL64rri32, X86::IMUL64rmi32, 0},
{X86::IMUL64rri8, X86::IMUL64rmi8, 0},
{X86::KMOVBkk, X86::KMOVBkm, TB_NO_REVERSE},
+ {X86::KMOVBkk_EVEX, X86::KMOVBkm_EVEX, TB_NO_REVERSE},
{X86::KMOVDkk, X86::KMOVDkm, 0},
+ {X86::KMOVDkk_EVEX, X86::KMOVDkm_EVEX, 0},
{X86::KMOVQkk, X86::KMOVQkm, 0},
+ {X86::KMOVQkk_EVEX, X86::KMOVQkm_EVEX, 0},
{X86::KMOVWkk, X86::KMOVWkm, 0},
+ {X86::KMOVWkk_EVEX, X86::KMOVWkm_EVEX, 0},
{X86::LWPINS32rri, X86::LWPINS32rmi, 0},
{X86::LWPINS64rri, X86::LWPINS64rmi, 0},
{X86::LWPVAL32rri, X86::LWPVAL32rmi, 0},
>From 71a8834efbcccfcdbcb74cadcc79990e551a2237 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Thu, 30 Nov 2023 14:42:02 +0800
Subject: [PATCH 2/3] add missing case for KMOVWmk and format
---
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 6 ++++--
llvm/lib/Target/X86/X86InstrInfo.td | 5 ++---
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 29da95459544785..4f1971efb435c07 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -505,8 +505,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
- auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
- auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
+ auto MIBLo = BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
+ auto MIBHi = BuildMI(MBB, MBBI, DL,
+ TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
for (int i = 0; i < X86::AddrNumOperands; ++i) {
MIBLo.add(MBBI->getOperand(i));
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 07c05d6eb3f844b..9ec09ac3d28e2e1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -878,11 +878,10 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
// X86 Instruction Predicate Definitions.
def TruePredicate : Predicate<"true">;
-def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
-def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
+def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
+def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
-
def HasNOPL : Predicate<"Subtarget->hasNOPL()">;
def HasMMX : Predicate<"Subtarget->hasMMX()">;
def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">;
>From db590e0230e7ef9cb947a504fcb99d3d33010482 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Thu, 30 Nov 2023 16:04:11 +0800
Subject: [PATCH 3/3] Fix
---
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 4f1971efb435c07..ecc7208e760722b 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -264,6 +264,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
const DebugLoc &DL = MBBI->getDebugLoc();
+ bool HasEGPR = STI->hasEGPR();
switch (Opcode) {
default:
return false;
@@ -466,7 +467,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
- bool HasEGPR = STI->hasEGPR();
auto MIBLo =
BuildMI(MBB, MBBI, DL,
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
More information about the llvm-commits
mailing list