[llvm] [X86][CodeGen] Prefer KMOVkk_EVEX than KMOVkk when EGPR is supported (PR #74048)
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 1 00:17:36 PST 2023
https://github.com/KanRobert updated https://github.com/llvm/llvm-project/pull/74048
>From cadd0baa402b727718d42ae6942d1f009e11dd3d Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Fri, 1 Dec 2023 15:54:20 +0800
Subject: [PATCH 1/2] [X86][CodeGen] Prefer KMOVkk_EVEX than KMOVkk when EGPR
is supported
In memory fold table, we have
```
{X86::KMOVDkk, X86::KMOVDkm, 0},
{X86::KMOVDkk_EVEX, X86::KMOVDkm_EVEX, 0}
```
where KMOVDkm_EVEX can use EGPR as base and index registers, while
KMOVDkm can't. Hence, though KMOVkk does not have any GPR operands,
we prefer to use KMOVDkk_EVEX to help memory fold.
It will be compressed to KMOVDkk in EVEX2VEX pass if memory folding
does not happen.
---
llvm/lib/Target/X86/X86DomainReassignment.cpp | 23 +++++++++++--------
llvm/lib/Target/X86/X86InstrInfo.cpp | 4 +++-
llvm/test/CodeGen/X86/apx/kmov-kk.ll | 14 +++++++++++
3 files changed, 31 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/kmov-kk.ll
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index be7e8db95b98ed8..bdd86e48fa54388 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -625,8 +625,10 @@ void X86DomainReassignment::initConverters() {
createReplacerDstCOPY(X86::MOVZX64rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
- createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
- createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
+ createReplacerDstCOPY(X86::MOVZX32rr16,
+ HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
+ createReplacerDstCOPY(X86::MOVZX64rr16,
+ HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
if (STI->hasDQI()) {
createReplacerDstCOPY(X86::MOVZX16rm8,
@@ -636,9 +638,12 @@ void X86DomainReassignment::initConverters() {
createReplacerDstCOPY(X86::MOVZX64rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
- createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
- createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
- createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX16rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX32rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+ createReplacerDstCOPY(X86::MOVZX64rr8,
+ HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
}
auto createReplacer = [&](unsigned From, unsigned To) {
@@ -647,7 +652,7 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
- createReplacer(X86::MOV16rr, X86::KMOVWkk);
+ createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
createReplacer(X86::NOT16r, X86::KNOTWrr);
@@ -662,8 +667,8 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
- createReplacer(X86::MOV32rr, X86::KMOVDkk);
- createReplacer(X86::MOV64rr, X86::KMOVQkk);
+ createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
+ createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
@@ -703,7 +708,7 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
- createReplacer(X86::MOV8rr, X86::KMOVBkk);
+ createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacer(X86::NOT8r, X86::KNOTBrr);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 583f8ec73a0361f..ea3bf1f101c1e08 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4070,6 +4070,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// First deal with the normal symmetric copies.
bool HasAVX = Subtarget.hasAVX();
bool HasVLX = Subtarget.hasVLX();
+ bool HasEGPR = Subtarget.hasEGPR();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -4124,7 +4125,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// All KMASK RegClasses hold the same k registers, can be tested against
// anyone.
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
+ Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
+ : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
diff --git a/llvm/test/CodeGen/X86/apx/kmov-kk.ll b/llvm/test/CodeGen/X86/apx/kmov-kk.ll
new file mode 100644
index 000000000000000..c9a4c5762872f45
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/kmov-kk.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
+
+define <16 x i32> @kmovkk(ptr %base, <16 x i32> %ind, i16 %mask) {
+; EGPR: kmovw %esi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xce]
+ %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+ %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+ %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+ %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+ %res = add <16 x i32> %gt1, %gt2
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
>From 21e4facaae1d378d5d29c68e8f456457c922c013 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Fri, 1 Dec 2023 16:17:13 +0800
Subject: [PATCH 2/2] update test
---
llvm/test/CodeGen/X86/apx/kmov-kk.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/apx/kmov-kk.ll b/llvm/test/CodeGen/X86/apx/kmov-kk.ll
index c9a4c5762872f45..639a35f4546fc90 100644
--- a/llvm/test/CodeGen/X86/apx/kmov-kk.ll
+++ b/llvm/test/CodeGen/X86/apx/kmov-kk.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
define <16 x i32> @kmovkk(ptr %base, <16 x i32> %ind, i16 %mask) {
-; EGPR: kmovw %esi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xce]
+; EGPR: kmovq %k1, %k2 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0xd1]
%broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
%broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
%gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
More information about the llvm-commits
mailing list