[llvm-branch-commits] [llvm] 68d6fe5 - [X86][CodeGen] Prefer KMOVkk_EVEX than KMOVkk when EGPR is supported (#74048)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 4 11:02:40 PST 2023
Author: Shengchen Kan
Date: 2023-12-02T22:43:02+08:00
New Revision: 68d6fe508ce1fb2a70220975036d267f564fe54d
URL: https://github.com/llvm/llvm-project/commit/68d6fe508ce1fb2a70220975036d267f564fe54d
DIFF: https://github.com/llvm/llvm-project/commit/68d6fe508ce1fb2a70220975036d267f564fe54d.diff
LOG: [X86][CodeGen] Prefer KMOVkk_EVEX than KMOVkk when EGPR is supported (#74048)
In memory fold table, we have
```
{X86::KMOVDkk, X86::KMOVDkm, 0},
{X86::KMOVDkk_EVEX, X86::KMOVDkm_EVEX, 0}
```
where `KMOVDkm_EVEX` can use EGPR as base and index registers, while
`KMOVDkm` can't. Hence, though `KMOVkk` does not have any GPR operands,
we prefer to use `KMOVDkk_EVEX` to help register allocation.
It will be compressed to `KMOVDkk` in EVEX2VEX pass if memory folding
does not happen.
Added:
llvm/test/CodeGen/X86/apx/kmov-kk.ll
Modified:
llvm/lib/Target/X86/X86DomainReassignment.cpp
llvm/lib/Target/X86/X86InstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index be7e8db95b98e..bdd86e48fa543 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -625,8 +625,10 @@ void X86DomainReassignment::initConverters() {
createReplacerDstCOPY(X86::MOVZX64rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
- createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
- createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
+ createReplacerDstCOPY(X86::MOVZX32rr16,
+ HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
+ createReplacerDstCOPY(X86::MOVZX64rr16,
+ HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
if (STI->hasDQI()) {
createReplacerDstCOPY(X86::MOVZX16rm8,
@@ -636,9 +638,12 @@ void X86DomainReassignment::initConverters() {
createReplacerDstCOPY(X86::MOVZX64rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
- createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
- createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX16rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX32rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX64rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
}
auto createReplacer = [&](unsigned From, unsigned To) {
@@ -647,7 +652,7 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
- createReplacer(X86::MOV16rr, X86::KMOVWkk);
+ createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
createReplacer(X86::NOT16r, X86::KNOTWrr);
@@ -662,8 +667,8 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
- createReplacer(X86::MOV32rr, X86::KMOVDkk);
- createReplacer(X86::MOV64rr, X86::KMOVQkk);
+ createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
+ createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
@@ -703,7 +708,7 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
- createReplacer(X86::MOV8rr, X86::KMOVBkk);
+ createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacer(X86::NOT8r, X86::KNOTBrr);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 583f8ec73a036..ea3bf1f101c1e 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4070,6 +4070,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// First deal with the normal symmetric copies.
bool HasAVX = Subtarget.hasAVX();
bool HasVLX = Subtarget.hasVLX();
+ bool HasEGPR = Subtarget.hasEGPR();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -4124,7 +4125,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// All KMASK RegClasses hold the same k registers, can be tested against
// anyone.
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
+ Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
+ : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
diff --git a/llvm/test/CodeGen/X86/apx/kmov-kk.ll b/llvm/test/CodeGen/X86/apx/kmov-kk.ll
new file mode 100644
index 0000000000000..639a35f4546fc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-kk.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
+
+define <16 x i32> @kmovkk(ptr %base, <16 x i32> %ind, i16 %mask) {
+; EGPR: kmovq %k1, %k2 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0xd1]
+ %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+ %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+ %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+ %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+ %res = add <16 x i32> %gt1, %gt2
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
More information about the llvm-branch-commits
mailing list