[llvm-branch-commits] [llvm] 59188e1 - Revert "[X86] Allow EVEX compression for mask registers (#171980)"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 7 14:02:50 PST 2026
Author: Simon Pilgrim
Date: 2026-01-07T22:02:45Z
New Revision: 59188e19250b1797ae7e070de5e1e1ed0c388521
URL: https://github.com/llvm/llvm-project/commit/59188e19250b1797ae7e070de5e1e1ed0c388521
DIFF: https://github.com/llvm/llvm-project/commit/59188e19250b1797ae7e070de5e1e1ed0c388521.diff
LOG: Revert "[X86] Allow EVEX compression for mask registers (#171980)"
This reverts commit 1caf2704dd6791baa4b958d6a666ea64ec24795d.
Added:
Modified:
llvm/lib/Target/X86/X86CompressEVEX.cpp
llvm/test/CodeGen/X86/avx512-ext.ll
llvm/test/CodeGen/X86/avx512-insert-extract.ll
llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/masked_compressstore.ll
llvm/test/CodeGen/X86/masked_expandload.ll
llvm/test/CodeGen/X86/masked_gather_scatter.ll
llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
llvm/test/CodeGen/X86/masked_load.ll
llvm/test/CodeGen/X86/masked_store.ll
llvm/test/CodeGen/X86/pr77459.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp
index fd79772dcb7b1..59d653b84eb8a 100644
--- a/llvm/lib/Target/X86/X86CompressEVEX.cpp
+++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp
@@ -16,7 +16,6 @@
// d. NF_ND (EVEX) -> NF (EVEX)
// e. NonNF (EVEX) -> NF (EVEX)
// f. SETZUCCm (EVEX) -> SETCCm (legacy)
-// g. VPMOV*2M (EVEX) + KMOV -> VMOVMSK/VPMOVMSKB (VEX)
//
// Compression a, b and c can always reduce code size, with some exceptions
// such as promoted 16-bit CRC32 which is as long as the legacy version.
@@ -42,7 +41,6 @@
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
@@ -180,137 +178,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
return true;
}
-static bool isKMovNarrowing(unsigned VPMOVOpc, unsigned KMOVOpc) {
- unsigned VPMOVBits = 0;
- switch (VPMOVOpc) {
- case X86::VPMOVQ2MZ128kr:
- VPMOVBits = 2;
- break;
- case X86::VPMOVQ2MZ256kr:
- case X86::VPMOVD2MZ128kr:
- VPMOVBits = 4;
- break;
- case X86::VPMOVD2MZ256kr:
- VPMOVBits = 8;
- break;
- case X86::VPMOVB2MZ128kr:
- VPMOVBits = 16;
- break;
- case X86::VPMOVB2MZ256kr:
- VPMOVBits = 32;
- break;
- default:
- llvm_unreachable("Unknown VPMOV opcode");
- }
-
- unsigned KMOVSize = 0;
- switch (KMOVOpc) {
- case X86::KMOVBrk:
- KMOVSize = 8;
- break;
- case X86::KMOVWrk:
- KMOVSize = 16;
- break;
- case X86::KMOVDrk:
- KMOVSize = 32;
- break;
- default:
- llvm_unreachable("Unknown KMOV opcode");
- }
-
- return KMOVSize < VPMOVBits;
-}
-
-// Try to compress VPMOV*2M + KMOV chain patterns:
-// vpmov*2m %xmm0, %k0 -> (erase this)
-// kmov* %k0, %eax -> vmovmskp* %xmm0, %eax
-static bool tryCompressVPMOVPattern(MachineInstr &MI, MachineBasicBlock &MBB,
- const X86Subtarget &ST,
- SmallVectorImpl<MachineInstr *> &ToErase) {
- const X86InstrInfo *TII = ST.getInstrInfo();
- const TargetRegisterInfo *TRI = ST.getRegisterInfo();
-
- unsigned Opc = MI.getOpcode();
- if (Opc != X86::VPMOVD2MZ128kr && Opc != X86::VPMOVD2MZ256kr &&
- Opc != X86::VPMOVQ2MZ128kr && Opc != X86::VPMOVQ2MZ256kr &&
- Opc != X86::VPMOVB2MZ128kr && Opc != X86::VPMOVB2MZ256kr)
- return false;
-
- Register MaskReg = MI.getOperand(0).getReg();
- Register SrcVecReg = MI.getOperand(1).getReg();
-
- unsigned MovMskOpc = 0;
- switch (Opc) {
- case X86::VPMOVD2MZ128kr:
- MovMskOpc = X86::VMOVMSKPSrr;
- break;
- case X86::VPMOVD2MZ256kr:
- MovMskOpc = X86::VMOVMSKPSYrr;
- break;
- case X86::VPMOVQ2MZ128kr:
- MovMskOpc = X86::VMOVMSKPDrr;
- break;
- case X86::VPMOVQ2MZ256kr:
- MovMskOpc = X86::VMOVMSKPDYrr;
- break;
- case X86::VPMOVB2MZ128kr:
- MovMskOpc = X86::VPMOVMSKBrr;
- break;
- case X86::VPMOVB2MZ256kr:
- MovMskOpc = X86::VPMOVMSKBYrr;
- break;
- default:
- llvm_unreachable("Unknown VPMOV opcode");
- }
-
- MachineInstr *KMovMI = nullptr;
-
- for (MachineInstr &CurMI : llvm::make_range(
- std::next(MachineBasicBlock::iterator(MI)), MBB.end())) {
- if (CurMI.modifiesRegister(MaskReg, TRI)) {
- if (!KMovMI)
- return false; // Mask clobbered before use
- break;
- }
-
- if (CurMI.readsRegister(MaskReg, TRI)) {
- if (KMovMI)
- return false; // Fail: Mask has MULTIPLE uses
-
- unsigned UseOpc = CurMI.getOpcode();
- bool IsKMOV = UseOpc == X86::KMOVBrk || UseOpc == X86::KMOVWrk ||
- UseOpc == X86::KMOVDrk;
- // Only allow non-narrowing KMOV uses of the mask.
- if (IsKMOV && CurMI.getOperand(1).getReg() == MaskReg &&
- !isKMovNarrowing(Opc, UseOpc)) {
- KMovMI = &CurMI;
- // continue scanning to ensure
- // there are no *other* uses of the mask later in the block.
- } else {
- return false;
- }
- }
-
- if (!KMovMI && CurMI.modifiesRegister(SrcVecReg, TRI)) {
- return false; // SrcVecReg modified before it could be used by MOVMSK
- }
- }
-
- if (!KMovMI)
- return false;
-
- // Apply the transformation
- KMovMI->setDesc(TII->get(MovMskOpc));
- KMovMI->getOperand(1).setReg(SrcVecReg);
- KMovMI->setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
-
- ToErase.push_back(&MI);
- return true;
-}
-
static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
- const X86Subtarget &ST,
- SmallVectorImpl<MachineInstr *> &ToErase) {
+ const X86Subtarget &ST) {
uint64_t TSFlags = MI.getDesc().TSFlags;
// Check for EVEX instructions only.
@@ -321,10 +190,6 @@ static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2))
return false;
- // Specialized VPMOVD2M + KMOV -> MOVMSK fold first.
- if (tryCompressVPMOVPattern(MI, MBB, ST, ToErase))
- return true;
-
auto IsRedundantNewDataDest = [&](unsigned &Opc) {
// $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx
// ->
@@ -485,15 +350,9 @@ static bool runOnMF(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
- SmallVector<MachineInstr *, 4> ToErase;
-
- for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
- Changed |= CompressEVEXImpl(MI, MBB, ST, ToErase);
- }
-
- for (MachineInstr *MI : ToErase) {
- MI->eraseFromParent();
- }
+ // Traverse the basic block.
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ Changed |= CompressEVEXImpl(MI, MBB, ST);
}
LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";);
return Changed;
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 2617e2d12adfd..1a712ffac5b7e 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1745,7 +1745,8 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
; AVX512DQNOBW: # %bb.0:
; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
-; AVX512DQNOBW-NEXT: vmovmskps %ymm0, %eax
+; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
+; AVX512DQNOBW-NEXT: kmovw %k0, %eax
; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQNOBW-NEXT: vzeroupper
; AVX512DQNOBW-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index f8b0c3465f3db..e183da1386d5b 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1669,7 +1669,8 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) n
; SKX-NEXT: vpmovm2b %k0, %ymm0
; SKX-NEXT: vpbroadcastb %eax, %ymm0 {%k1}
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
-; SKX-NEXT: vpmovmskb %ymm0, %eax
+; SKX-NEXT: vpmovb2m %ymm0, %k0
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%t1 = icmp ugt <32 x i8> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
index 6f3be88d7cd0c..f31dafcd68626 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
@@ -2751,7 +2751,8 @@ declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
define i8 at test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovmskps %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x50,0xc0]
+; CHECK-NEXT: vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
@@ -2776,7 +2777,8 @@ declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
define i8 at test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovmskpd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x50,0xc0]
+; CHECK-NEXT: vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
@@ -2788,7 +2790,8 @@ declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
define i8 at test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovmskpd %ymm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x50,0xc0]
+; CHECK-NEXT: vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll
index 5296c9d0f0777..3187bf6448690 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore.ll
@@ -3444,7 +3444,8 @@ define void @compressstore_v8i16_v8i16(ptr %base, <8 x i16> %V, <8 x i16> %trigg
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1
-; AVX512VLDQ-NEXT: vmovmskps %ymm1, %eax
+; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0
+; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: jne LBB11_1
; AVX512VLDQ-NEXT: ## %bb.2: ## %else
diff --git a/llvm/test/CodeGen/X86/masked_expandload.ll b/llvm/test/CodeGen/X86/masked_expandload.ll
index ce8a34db498df..e81a983c07018 100644
--- a/llvm/test/CodeGen/X86/masked_expandload.ll
+++ b/llvm/test/CodeGen/X86/masked_expandload.ll
@@ -3047,7 +3047,8 @@ define <8 x i16> @expandload_v8i16_v8i16(ptr %base, <8 x i16> %src0, <8 x i16> %
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1
-; AVX512VLDQ-NEXT: vmovmskps %ymm1, %eax
+; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0
+; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: jne LBB11_1
; AVX512VLDQ-NEXT: ## %bb.2: ## %else
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index cf49ac1e4886b..58adbb767ed87 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -966,9 +966,10 @@ define <2 x double> @test17(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x doub
; X86-SKX-LABEL: test17:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
; X86-SKX-NEXT: vpslld $3, %xmm0, %xmm0
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
-; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB16_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1254,7 +1255,8 @@ define void @test20(<2 x float>%a1, <2 x ptr> %ptr, <2 x i1> %mask) {
; X64-SKX-LABEL: test20:
; X64-SKX: # %bb.0:
; X64-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; X64-SKX-NEXT: vmovmskpd %xmm2, %eax
+; X64-SKX-NEXT: vpmovq2m %xmm2, %k0
+; X64-SKX-NEXT: kmovw %k0, %eax
; X64-SKX-NEXT: testb $1, %al
; X64-SKX-NEXT: jne .LBB19_1
; X64-SKX-NEXT: # %bb.2: # %else
@@ -1275,7 +1277,8 @@ define void @test20(<2 x float>%a1, <2 x ptr> %ptr, <2 x i1> %mask) {
; X86-SKX-LABEL: test20:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; X86-SKX-NEXT: vmovmskpd %xmm2, %eax
+; X86-SKX-NEXT: vpmovq2m %xmm2, %k0
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB19_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1349,7 +1352,8 @@ define void @test21(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask) {
; X64-SKX-LABEL: test21:
; X64-SKX: # %bb.0:
; X64-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; X64-SKX-NEXT: vmovmskpd %xmm2, %eax
+; X64-SKX-NEXT: vpmovq2m %xmm2, %k0
+; X64-SKX-NEXT: kmovw %k0, %eax
; X64-SKX-NEXT: testb $1, %al
; X64-SKX-NEXT: jne .LBB20_1
; X64-SKX-NEXT: # %bb.2: # %else
@@ -1370,7 +1374,8 @@ define void @test21(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask) {
; X86-SKX-LABEL: test21:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; X86-SKX-NEXT: vmovmskpd %xmm2, %eax
+; X86-SKX-NEXT: vpmovq2m %xmm2, %k0
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB20_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1489,9 +1494,10 @@ define <2 x float> @test22(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float
; X86-SKX-LABEL: test22:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
-; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB21_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1611,10 +1617,11 @@ define <2 x float> @test22a(ptr %base, <2 x i64> %ind, <2 x i1> %mask, <2 x floa
; X86-SKX-LABEL: test22a:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
; X86-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
-; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB22_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1734,9 +1741,10 @@ define <2 x i32> @test23(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %s
; X86-SKX-LABEL: test23:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
-; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB23_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -1852,10 +1860,11 @@ define <2 x i32> @test23b(ptr %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> %
; X86-SKX-LABEL: test23b:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
; X86-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
-; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB24_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -2025,9 +2034,10 @@ define <2 x i64> @test25(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %s
; X86-SKX-LABEL: test25:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; X86-SKX-NEXT: vpmovq2m %xmm1, %k0
; X86-SKX-NEXT: vpslld $3, %xmm0, %xmm0
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0
-; X86-SKX-NEXT: vmovmskpd %xmm1, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB26_1
; X86-SKX-NEXT: # %bb.2: # %else
@@ -3752,9 +3762,10 @@ define void @test_scatter_2i32_index(<2 x double> %a1, ptr %base, <2 x i32> %ind
; X86-SKX-LABEL: test_scatter_2i32_index:
; X86-SKX: # %bb.0:
; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2
+; X86-SKX-NEXT: vpmovq2m %xmm2, %k0
; X86-SKX-NEXT: vpslld $3, %xmm1, %xmm1
; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm1, %xmm1
-; X86-SKX-NEXT: vmovmskpd %xmm2, %eax
+; X86-SKX-NEXT: kmovw %k0, %eax
; X86-SKX-NEXT: testb $1, %al
; X86-SKX-NEXT: jne .LBB52_1
; X86-SKX-NEXT: # %bb.2: # %else
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
index 5b5280601ea71..aad1b44344850 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
@@ -164,7 +164,8 @@ define <2 x i32> @test_gather_v2i32_data(<2 x ptr> %ptr, <2 x i1> %mask, <2 x i3
; WIDEN_SKX-LABEL: test_gather_v2i32_data:
; WIDEN_SKX: # %bb.0:
; WIDEN_SKX-NEXT: vpsllq $63, %xmm1, %xmm1
-; WIDEN_SKX-NEXT: vmovmskpd %xmm1, %eax
+; WIDEN_SKX-NEXT: vpmovq2m %xmm1, %k0
+; WIDEN_SKX-NEXT: kmovw %k0, %eax
; WIDEN_SKX-NEXT: testb $1, %al
; WIDEN_SKX-NEXT: jne .LBB2_1
; WIDEN_SKX-NEXT: # %bb.2: # %else
@@ -225,7 +226,8 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask
; WIDEN_SKX-LABEL: test_scatter_v2i32_data:
; WIDEN_SKX: # %bb.0:
; WIDEN_SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; WIDEN_SKX-NEXT: vmovmskpd %xmm2, %eax
+; WIDEN_SKX-NEXT: vpmovq2m %xmm2, %k0
+; WIDEN_SKX-NEXT: kmovw %k0, %eax
; WIDEN_SKX-NEXT: testb $1, %al
; WIDEN_SKX-NEXT: jne .LBB3_1
; WIDEN_SKX-NEXT: # %bb.2: # %else
diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll
index fa8f34cea4638..8c4bab99a5b7b 100644
--- a/llvm/test/CodeGen/X86/masked_load.ll
+++ b/llvm/test/CodeGen/X86/masked_load.ll
@@ -3008,7 +3008,8 @@ define <8 x i16> @load_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %dst
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0
-; AVX512VLDQ-NEXT: vmovmskps %ymm0, %eax
+; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k0
+; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: jne LBB21_1
; AVX512VLDQ-NEXT: ## %bb.2: ## %else
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index fbecfcb45f8e7..c7320275091c6 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -1829,7 +1829,8 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) no
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0
-; AVX512VLDQ-NEXT: vmovmskps %ymm0, %eax
+; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k0
+; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: jne LBB13_1
; AVX512VLDQ-NEXT: ## %bb.2: ## %else
diff --git a/llvm/test/CodeGen/X86/pr77459.ll b/llvm/test/CodeGen/X86/pr77459.ll
index b03907d6c871f..9c072e6f5e3fc 100644
--- a/llvm/test/CodeGen/X86/pr77459.ll
+++ b/llvm/test/CodeGen/X86/pr77459.ll
@@ -100,7 +100,8 @@ define i8 @reverse_cmp_v8i1(<8 x i16> %a0, <8 x i16> %a1) {
; AVX512-NEXT: vpmovm2d %k0, %ymm0
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vmovmskps %ymm0, %eax
+; AVX512-NEXT: vpmovd2m %ymm0, %k0
+; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -226,7 +227,8 @@ define i32 @reverse_cmp_v32i1(<32 x i8> %a0, <32 x i8> %a1) {
; AVX512-V4-NEXT: vpmovm2b %k0, %ymm0
; AVX512-V4-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16]
; AVX512-V4-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512-V4-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-V4-NEXT: vpmovb2m %ymm0, %k0
+; AVX512-V4-NEXT: kmovd %k0, %eax
; AVX512-V4-NEXT: vzeroupper
; AVX512-V4-NEXT: retq
;
@@ -236,7 +238,8 @@ define i32 @reverse_cmp_v32i1(<32 x i8> %a0, <32 x i8> %a1) {
; AVX512-VBMI-NEXT: vpmovm2b %k0, %ymm0
; AVX512-VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; AVX512-VBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
-; AVX512-VBMI-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-VBMI-NEXT: vpmovb2m %ymm0, %k0
+; AVX512-VBMI-NEXT: kmovd %k0, %eax
; AVX512-VBMI-NEXT: vzeroupper
; AVX512-VBMI-NEXT: retq
%cmp = icmp eq <32 x i8> %a0, %a1
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 116dcdc8c5907..f434fc8c6cad8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1240,7 +1240,8 @@ define i8 @icmp0_v8i1(<8 x i8>) nounwind {
; AVX512VL-LABEL: icmp0_v8i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmovmskb %xmm0, %eax
+; AVX512VL-NEXT: vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
@@ -1906,7 +1907,8 @@ define i8 @icmp1_v8i1(<8 x i8>) nounwind {
; AVX512VL-LABEL: icmp1_v8i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmovmskb %xmm0, %eax
+; AVX512VL-NEXT: vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: cmpb $-1, %al
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
index 9645f7c524cb4..2b89590a0bb41 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -573,7 +573,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VL_BW_DQ-NEXT: vpmovsxbd {{.*#+}} ymm2 = [8,2,10,3,3,2,2,3]
; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
-; VL_BW_DQ-NEXT: vmovmskps %ymm2, %eax
+; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0
+; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -614,7 +615,8 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; VL_BW_DQ-NEXT: kmovd %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; VL_BW_DQ-NEXT: vmovmskps %ymm0, %eax
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
+; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -659,7 +661,8 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VL_BW_DQ-NEXT: vpmovsxbd {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0]
; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
-; VL_BW_DQ-NEXT: vmovmskps %ymm2, %eax
+; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0
+; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -700,7 +703,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: kmovd %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; VL_BW_DQ-NEXT: vmovmskps %ymm0, %eax
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
+; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -742,7 +746,8 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6,7]
-; VL_BW_DQ-NEXT: vmovmskps %ymm0, %eax
+; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
+; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
@@ -791,7 +796,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; VL_BW_DQ-NEXT: vpmovsxbd {{.*#+}} ymm1 = [9,1,2,3,4,5,6,7]
; VL_BW_DQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; VL_BW_DQ-NEXT: vpermt2d %ymm0, %ymm1, %ymm2
-; VL_BW_DQ-NEXT: vmovmskps %ymm2, %eax
+; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0
+; VL_BW_DQ-NEXT: kmovd %k0, %eax
; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
More information about the llvm-branch-commits
mailing list