[llvm] [X86] Allow EVEX compression for mask registers (PR #171980)

Sun Dec 28 08:13:08 PST 2025

================
@@ -175,8 +177,88 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
   return true;
 }
 
+// Try to compress VPMOV*2M + KMOV chain patterns:
+//   vpmov*2m %xmm0, %k0     ->  (erase this)
+//   kmov* %k0, %eax         ->  vmovmskp* %xmm0, %eax
+static bool tryCompressVPMOVPattern(MachineInstr &MI, MachineBasicBlock &MBB,
+                                    const X86Subtarget &ST,
+                                    SmallVectorImpl<MachineInstr *> &ToErase) {
+  const X86InstrInfo *TII = ST.getInstrInfo();
+  const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+  unsigned Opc = MI.getOpcode();
+  if (Opc != X86::VPMOVD2MZ128kr && Opc != X86::VPMOVD2MZ256kr &&
+      Opc != X86::VPMOVQ2MZ128kr && Opc != X86::VPMOVQ2MZ256kr)
+    return false;
+
+  Register MaskReg = MI.getOperand(0).getReg();
+  Register SrcVecReg = MI.getOperand(1).getReg();
+
+  unsigned MovMskOpc = 0;
+  switch (Opc) {
+  case X86::VPMOVD2MZ128kr:
+    MovMskOpc = X86::VMOVMSKPSrr;
+    break;
+  case X86::VPMOVD2MZ256kr:
+    MovMskOpc = X86::VMOVMSKPSYrr;
+    break;
+  case X86::VPMOVQ2MZ128kr:
+    MovMskOpc = X86::VMOVMSKPDrr;
+    break;
+  case X86::VPMOVQ2MZ256kr:
+    MovMskOpc = X86::VMOVMSKPDYrr;
+    break;
+  default:
+    llvm_unreachable("Unknown VPMOV opcode");
+  }
+
+  MachineInstr *KMovMI = nullptr;
+
+  for (MachineInstr &CurMI : llvm::make_range(
+           std::next(MachineBasicBlock::iterator(MI)), MBB.end())) {
+    if (CurMI.modifiesRegister(MaskReg, TRI)) {
+      if (!KMovMI)
+        return false; // Mask clobbered before use
+      break;
+    }
+
+    if (CurMI.readsRegister(MaskReg, TRI)) {
+      if (KMovMI)
+        return false; // Fail: Mask has MULTIPLE uses
+
+      unsigned UseOpc = CurMI.getOpcode();
+      bool IsKMOV = (UseOpc == X86::KMOVBrk || UseOpc == X86::KMOVWrk ||
+                     UseOpc == X86::KMOVDrk || UseOpc == X86::KMOVQrk);
----------------
RKSimon wrote:

Yes, thanks for checking - if we just keep to VPMOVD2*/VPMOVQ2* instructions then we're always OK as at most we have 8 active bits  - but if we want to support VPMOVB2* (VPMOVMSKB) as well then we will need something like the above constraint to handle 16/32 active bits.

It would be good to handle VPMOVB2* as well, but if you don't want to in this patch a TODO comment (including the KMOV issue) would be OK for now.

https://github.com/llvm/llvm-project/pull/171980