[llvm] [X86][MC] Compress APX Promoted instrs from evex to legacy encoding to save code size. (PR #77065)

Fri Jan 5 01:42:34 PST 2024

https://github.com/XinWang10 created https://github.com/llvm/llvm-project/pull/77065

APX promote some legacy instruction to evex encoding, so that they could use r16-r31, but if they don't use egpr, then the instrution have same functionality as legacy version but larger code size, we could try to compress them to legacy type to save code size.
This optimization is integrated to existing EvexToVexInstPass and rename to EvexToNonEvexInstPass.

>From a3113dabc8fe0a37ffef11fc507597e538be35e0 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Thu, 4 Jan 2024 01:28:52 -0800
Subject: [PATCH 1/2] basic support

---
 llvm/lib/Target/X86/CMakeLists.txt            |   4 +-
 llvm/lib/Target/X86/X86.h                     |   6 +-
 ...{X86EvexToVex.cpp => X86EvexToNonEvex.cpp} |  96 ++++++----
 llvm/lib/Target/X86/X86InstrInfo.h            |   4 +
 llvm/lib/Target/X86/X86MCInstLower.cpp        |   4 +
 llvm/lib/Target/X86/X86TargetMachine.cpp      |   4 +-
 llvm/test/CodeGen/X86/O0-pipeline.ll          |   2 +-
 .../test/CodeGen/X86/evex-to-vex-compress.mir |   2 +-
 llvm/test/CodeGen/X86/opt-pipeline.ll         |   2 +-
 llvm/utils/TableGen/CMakeLists.txt            |   2 +-
 ...r.cpp => X86EVEX2NonEVEXTablesEmitter.cpp} | 173 ++++++++++++++----
 .../TableGen/X86ManualEVEXCompressTables.def  |  22 +++
 12 files changed, 247 insertions(+), 74 deletions(-)
 rename llvm/lib/Target/X86/{X86EvexToVex.cpp => X86EvexToNonEvex.cpp} (73%)
 rename llvm/utils/TableGen/{X86EVEX2VEXTablesEmitter.cpp => X86EVEX2NonEVEXTablesEmitter.cpp} (53%)
 create mode 100644 llvm/utils/TableGen/X86ManualEVEXCompressTables.def

diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 0b7a98ad6341dd..5cd2a8e40d0d58 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -8,7 +8,7 @@ tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
+tablegen(LLVM X86GenEVEX2NonEVEXTables.inc -gen-x86-EVEX2NonEVEX-tables)
 tablegen(LLVM X86GenExegesis.inc -gen-exegesis)
 tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
@@ -61,7 +61,7 @@ set(sources
   X86InstrFMA3Info.cpp
   X86InstrFoldTables.cpp
   X86InstrInfo.cpp
-  X86EvexToVex.cpp
+  X86EvexToNonEvex.cpp
   X86LoadValueInjectionLoadHardening.cpp
   X86LoadValueInjectionRetHardening.cpp
   X86MCInstLower.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 485afbc1dfbc24..9f2c641cce3aec 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -131,9 +131,9 @@ FunctionPass *createX86FixupBWInsts();
 /// to another, when profitable.
 FunctionPass *createX86DomainReassignmentPass();
 
-/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
+/// This pass replaces EVEX encoded of AVX-512 instructiosn by non-EVEX
 /// encoding when possible in order to reduce code size.
-FunctionPass *createX86EvexToVexInsts();
+FunctionPass *createX86EvexToNonEvexInsts();
 
 /// This pass creates the thunks for the retpoline feature.
 FunctionPass *createX86IndirectThunksPass();
@@ -167,7 +167,7 @@ FunctionPass *createX86SpeculativeLoadHardeningPass();
 FunctionPass *createX86SpeculativeExecutionSideEffectSuppression();
 FunctionPass *createX86ArgumentStackSlotPass();
 
-void initializeEvexToVexInstPassPass(PassRegistry &);
+void initializeEvexToNonEvexInstPassPass(PassRegistry &);
 void initializeFPSPass(PassRegistry &);
 void initializeFixupBWInstPassPass(PassRegistry &);
 void initializeFixupLEAPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
similarity index 73%
rename from llvm/lib/Target/X86/X86EvexToVex.cpp
rename to llvm/lib/Target/X86/X86EvexToNonEvex.cpp
index c425c37b418681..7eebb51e1927ae 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
@@ -1,5 +1,6 @@
-//===- X86EvexToVex.cpp ---------------------------------------------------===//
-// Compress EVEX instructions to VEX encoding when possible to reduce code size
+//===- X86EvexToNonEvex.cpp -----------------------------------------------===//
+// Compress EVEX instructions to Non-EVEX encoding when possible to reduce code
+// size.
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -16,7 +17,11 @@
 /// accessed by instruction is less than 512 bits and when it does not use
 //  the xmm or the mask registers or xmm/ymm registers with indexes higher
 //  than 15.
-/// The pass applies code reduction on the generated code for AVX-512 instrs.
+//  APX promoted instrs use evex encoding which could let them use r16-r31, if
+//  they don't use egpr, we could compress them back to legacy encoding to save
+//  code size.
+/// The pass applies code reduction on the generated code for AVX-512 instrs and
+/// APX promoted instrs.
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,34 +43,35 @@
 
 using namespace llvm;
 
-// Including the generated EVEX2VEX tables.
-struct X86EvexToVexCompressTableEntry {
+// Including the generated EVEX2NonEVEX tables.
+struct X86EvexToNonEvexCompressTableEntry {
   uint16_t EvexOpc;
-  uint16_t VexOpc;
+  uint16_t NonEvexOpc;
 
-  bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
+  bool operator<(const X86EvexToNonEvexCompressTableEntry &RHS) const {
     return EvexOpc < RHS.EvexOpc;
   }
 
-  friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
+  friend bool operator<(const X86EvexToNonEvexCompressTableEntry &TE,
                         unsigned Opc) {
     return TE.EvexOpc < Opc;
   }
 };
-#include "X86GenEVEX2VEXTables.inc"
+#include "X86GenEVEX2NonEVEXTables.inc"
 
-#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
-#define EVEX2VEX_NAME "x86-evex-to-vex-compress"
+#define EVEX2NONEVEX_DESC                                                      \
+  "Compressing EVEX instrs to Non-EVEX encoding when possible"
+#define EVEX2NONEVEX_NAME "x86-evex-to-non-evex-compress"
 
-#define DEBUG_TYPE EVEX2VEX_NAME
+#define DEBUG_TYPE EVEX2NONEVEX_NAME
 
 namespace {
 
-class EvexToVexInstPass : public MachineFunctionPass {
+class EvexToNonEvexInstPass : public MachineFunctionPass {
 public:
   static char ID;
-  EvexToVexInstPass() : MachineFunctionPass(ID) {}
-  StringRef getPassName() const override { return EVEX2VEX_DESC; }
+  EvexToNonEvexInstPass() : MachineFunctionPass(ID) {}
+  StringRef getPassName() const override { return EVEX2NONEVEX_DESC; }
 
   /// Loop over all of the basic blocks, replacing EVEX instructions
   /// by equivalent VEX instructions when possible for reducing code size.
@@ -80,7 +86,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
 
 } // end anonymous namespace
 
-char EvexToVexInstPass::ID = 0;
+char EvexToNonEvexInstPass::ID = 0;
 
 static bool usesExtendedRegister(const MachineInstr &MI) {
   auto isHiRegIdx = [](unsigned Reg) {
@@ -151,15 +157,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
 }
 
 // Do any custom cleanup needed to finalize the conversion.
-static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
-  (void)VexOpc;
+static bool performCustomAdjustments(MachineInstr &MI, unsigned NonEvexOpc) {
+  (void)NonEvexOpc;
   unsigned Opc = MI.getOpcode();
   switch (Opc) {
   case X86::VALIGNDZ128rri:
   case X86::VALIGNDZ128rmi:
   case X86::VALIGNQZ128rri:
   case X86::VALIGNQZ128rmi: {
-    assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
+    assert((NonEvexOpc == X86::VPALIGNRrri || NonEvexOpc == X86::VPALIGNRrmi) &&
            "Unexpected new opcode!");
     unsigned Scale =
         (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
@@ -175,8 +181,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
   case X86::VSHUFI32X4Z256rri:
   case X86::VSHUFI64X2Z256rmi:
   case X86::VSHUFI64X2Z256rri: {
-    assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
-            VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
+    assert((NonEvexOpc == X86::VPERM2F128rr || NonEvexOpc == X86::VPERM2I128rr ||
+            NonEvexOpc == X86::VPERM2F128rm || NonEvexOpc == X86::VPERM2I128rm) &&
            "Unexpected new opcode!");
     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
     int64_t ImmVal = Imm.getImm();
@@ -214,6 +220,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
 // For EVEX instructions that can be encoded using VEX encoding
 // replace them by the VEX encoding in order to reduce size.
 static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
+  if (!ST.hasAVX512())
+    return false;
   // VEX format.
   // # of bytes: 0,2,3  1      1      0,1   0,1,2,4  0,1
   //  [Prefixes] [VEX]  OPCODE ModR/M [SIB] [DISP]  [IMM]
@@ -239,7 +247,7 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
     return false;
 
   // Use the VEX.L bit to select the 128 or 256-bit table.
-  ArrayRef<X86EvexToVexCompressTableEntry> Table =
+  ArrayRef<X86EvexToNonEvexCompressTableEntry> Table =
       (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
                                     : ArrayRef(X86EvexToVex128CompressTable);
 
@@ -252,15 +260,36 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
     return false;
   if (!checkVEXInstPredicate(EvexOpc, ST))
     return false;
-  if (!performCustomAdjustments(MI, I->VexOpc))
+  if (!performCustomAdjustments(MI, I->NonEvexOpc))
     return false;
 
-  MI.setDesc(ST.getInstrInfo()->get(I->VexOpc));
+  MI.setDesc(ST.getInstrInfo()->get(I->NonEvexOpc));
   MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
   return true;
 }
 
-bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
+// For apx promoted instructions, if they don't use egpr, we could try to use
+// legacy encoding to save code size.
+static bool CompressEVEX2LegacyImpl(MachineInstr &MI, const X86Subtarget &ST) {
+  if (!ST.hasEGPR())
+    return false;
+  ArrayRef<X86EvexToNonEvexCompressTableEntry> Table = X86EvexToLegacyCompressTable;
+  unsigned EvexOpc = MI.getOpcode();
+  const auto *I = llvm::lower_bound(Table, EvexOpc);
+  if (I == Table.end() || I->EvexOpc != EvexOpc)
+    return false;
+  unsigned NewOpc = I->NonEvexOpc;
+  for (unsigned Index = 0, Size = MI.getNumOperands(); Index < Size; Index++) {
+    const MachineOperand &Op = MI.getOperand(Index);
+    if (Op.isReg() && X86II::isApxExtendedReg(Op.getReg()))
+      return false;
+  }
+  MI.setDesc(ST.getInstrInfo()->get(NewOpc));
+  MI.setAsmPrinterFlag(X86::AC_EVEX_2_LEGACY);
+  return true;
+}
+
+bool EvexToNonEvexInstPass::runOnMachineFunction(MachineFunction &MF) {
 #ifndef NDEBUG
   // Make sure the tables are sorted.
   static std::atomic<bool> TableChecked(false);
@@ -269,28 +298,33 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
            "X86EvexToVex128CompressTable is not sorted!");
     assert(llvm::is_sorted(X86EvexToVex256CompressTable) &&
            "X86EvexToVex256CompressTable is not sorted!");
+    assert(llvm::is_sorted(X86EvexToLegacyCompressTable) &&
+           "X86EvexToLegacyCompressTable is not sorted!");
     TableChecked.store(true, std::memory_order_relaxed);
   }
 #endif
   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
-  if (!ST.hasAVX512())
+  if (!ST.hasAVX512() && !ST.hasEGPR())
     return false;
 
   bool Changed = false;
 
   /// Go over all basic blocks in function and replace
-  /// EVEX encoded instrs by VEX encoding when possible.
+  /// EVEX encoded instrs by VEX/Legacy encoding when possible.
   for (MachineBasicBlock &MBB : MF) {
     // Traverse the basic block.
-    for (MachineInstr &MI : MBB)
+    for (MachineInstr &MI : MBB) {
       Changed |= CompressEvexToVexImpl(MI, ST);
+      Changed |= CompressEVEX2LegacyImpl(MI, ST);
+    }
   }
 
   return Changed;
 }
 
-INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
+INITIALIZE_PASS(EvexToNonEvexInstPass, EVEX2NONEVEX_NAME, EVEX2NONEVEX_DESC,
+                false, false)
 
-FunctionPass *llvm::createX86EvexToVexInsts() {
-  return new EvexToVexInstPass();
+FunctionPass *llvm::createX86EvexToNonEvexInsts() {
+  return new EvexToNonEvexInstPass();
 }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index eac8d79eb8a32a..685c033e10f49a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -31,6 +31,10 @@ namespace X86 {
 enum AsmComments {
   // For instr that was compressed from EVEX to VEX.
   AC_EVEX_2_VEX = MachineInstr::TAsmComments
+  , // For instrs that was compressed from ND to non-ND.
+  AC_ND_2_NONND = AC_EVEX_2_VEX << 1
+  , // For instrs that was compressed from EVEX to Legacy.
+  AC_EVEX_2_LEGACY = AC_ND_2_NONND << 1
 };
 
 /// Return a pair of condition code for the given predicate and whether
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e1a67f61e76640..f854b109fc80e3 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2060,6 +2060,10 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
   if (TM.Options.MCOptions.ShowMCEncoding) {
     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
+    else if (MI->getAsmPrinterFlags() & X86::AC_ND_2_NONND)
+      OutStreamer->AddComment("ND TO non-ND Compression ", false);
+    else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
+      OutStreamer->AddComment("EVEX TO LEGACY Compression ", false);
   }
 
   // Add comments for values loaded from constant pool.
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 5668b514d6dec0..05f1dbd63f4f1f 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -75,7 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
   initializeGlobalISel(PR);
   initializeWinEHStatePassPass(PR);
   initializeFixupBWInstPassPass(PR);
-  initializeEvexToVexInstPassPass(PR);
+  initializeEvexToNonEvexInstPassPass(PR);
   initializeFixupLEAPassPass(PR);
   initializeFPSPass(PR);
   initializeX86FixupSetCCPassPass(PR);
@@ -575,7 +575,7 @@ void X86PassConfig::addPreEmitPass() {
     addPass(createX86FixupInstTuning());
     addPass(createX86FixupVectorConstants());
   }
-  addPass(createX86EvexToVexInsts());
+  addPass(createX86EvexToNonEvexInsts());
   addPass(createX86DiscriminateMemOpsPass());
   addPass(createX86InsertPrefetchPass());
   addPass(createX86InsertX87waitPass());
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 402645ed1e2e5d..feec8d3db27e64 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -68,7 +68,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       X86 Indirect Branch Tracking
 ; CHECK-NEXT:       X86 vzeroupper inserter
-; CHECK-NEXT:       Compressing EVEX instrs to VEX encoding when possibl
+; CHECK-NEXT:       Compressing EVEX instrs to Non-EVEX encoding when possible
 ; CHECK-NEXT:       X86 Discriminate Memory Operands
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       X86 insert wait instruction
diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
index 06d3c1532c3eaa..928ac700ee009d 100644
--- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
+++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-vex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-non-evex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
 # This test verifies VEX encoding for AVX-512 instructions that use registers of low indexes and
 # do not use zmm or mask registers and have a corresponding AVX/AVX2 opcode
 
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index fb8d2335b34106..a44e04e8ee41ed 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -205,7 +205,7 @@
 ; CHECK-NEXT:       X86 LEA Fixup
 ; CHECK-NEXT:       X86 Fixup Inst Tuning
 ; CHECK-NEXT:       X86 Fixup Vector Constants
-; CHECK-NEXT:       Compressing EVEX instrs to VEX encoding when possible
+; CHECK-NEXT:       Compressing EVEX instrs to Non-EVEX encoding when possible
 ; CHECK-NEXT:       X86 Discriminate Memory Operands
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       X86 insert wait instruction
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 071ea3bc07054b..32332c121604e7 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -81,7 +81,7 @@ add_tablegen(llvm-tblgen LLVM
   Types.cpp
   VarLenCodeEmitterGen.cpp
   X86DisassemblerTables.cpp
-  X86EVEX2VEXTablesEmitter.cpp
+  X86EVEX2NonEVEXTablesEmitter.cpp
   X86FoldTablesEmitter.cpp
   X86MnemonicTables.cpp
   X86ModRMFilters.cpp
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
similarity index 53%
rename from llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
rename to llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
index c80d9a199fa3c1..701ae20d908ab5 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
@@ -1,4 +1,4 @@
-//===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===//
+//=- utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp - X86 backend-*- C++ -*-//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -23,7 +23,12 @@ using namespace X86Disassembler;
 
 namespace {
 
-class X86EVEX2VEXTablesEmitter {
+const std::map<StringRef, StringRef> ManualMap = {
+#define EVEXENTRY(EVEX, NonEVEXInstStr) {#EVEX, #NonEVEXInstStr},
+#include "X86ManualEVEXCompressTables.def"
+};
+
+class X86EVEX2NonEVEXTablesEmitter {
   RecordKeeper &Records;
   CodeGenTarget Target;
 
@@ -40,28 +45,55 @@ class X86EVEX2VEXTablesEmitter {
   std::vector<Entry> EVEX2VEX128;
   std::vector<Entry> EVEX2VEX256;
 
+  // Hold all possibly compressed APX instructions, including only ND and EGPR
+  // instruction so far
+  std::vector<const CodeGenInstruction *> APXInsts;
+  // Hold all X86 instructions. Divided into groups with same opcodes
+  // to make the search more efficient
+  std::map<uint64_t, std::vector<const CodeGenInstruction *>> LegacyInsts;
+  // Represent EVEX to Legacy compress tables.
+  std::vector<Entry> EVEX2LegacyTable;
+
 public:
-  X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+  X86EVEX2NonEVEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
 
   // run - Output X86 EVEX2VEX tables.
   void run(raw_ostream &OS);
 
 private:
   // Prints the given table as a C++ array of type
-  // X86EvexToVexCompressTableEntry
+  // X86EvexToNonEvexCompressTableEntry
   void printTable(const std::vector<Entry> &Table, raw_ostream &OS);
+  // X86EVEXToLegacyCompressTableEntry
+  void printEVEX2LegacyTable(const std::vector<Entry> &Table, raw_ostream &OS);
+  void addManualEntry(const CodeGenInstruction *EVEXInstr,
+                      const CodeGenInstruction *LegacyInstr,
+                      const char *TableName);
 };
 
-void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
+void X86EVEX2NonEVEXTablesEmitter::printTable(const std::vector<Entry> &Table,
                                           raw_ostream &OS) {
-  StringRef Size = (Table == EVEX2VEX128) ? "128" : "256";
 
-  OS << "// X86 EVEX encoded instructions that have a VEX " << Size
-     << " encoding\n"
-     << "// (table format: <EVEX opcode, VEX-" << Size << " opcode>).\n"
-     << "static const X86EvexToVexCompressTableEntry X86EvexToVex" << Size
+  StringRef TargetEnc;
+  StringRef TableName;
+  StringRef Size;
+  if (Table == EVEX2LegacyTable){
+    TargetEnc = "Legacy";
+    TableName = "X86EvexToLegacy";
+  }
+  else {
+    TargetEnc = "VEX";
+    TableName = "X86EvexToVex";
+    Size = (Table == EVEX2VEX128) ? "128" : "256";
+  }
+
+  OS << "// X86 EVEX encoded instructions that have a " << TargetEnc << " "
+     << Size << " encoding\n"
+     << "// (table format: <EVEX opcode, " << TargetEnc << Size
+     << " opcode>).\n"
+     << "static const X86EvexToNonEvexCompressTableEntry " << TableName << Size
      << "CompressTable[] = {\n"
-     << "  // EVEX scalar with corresponding VEX.\n";
+     << "  // EVEX scalar with corresponding " << TargetEnc << ".\n";
 
   // Print all entries added to the table
   for (const auto &Pair : Table) {
@@ -85,6 +117,31 @@ static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
   return Value;
 }
 
+static bool checkMatchable(const CodeGenInstruction *EVEXInst,
+                           const CodeGenInstruction *NonEVEXInst) {
+  for (unsigned I = 0, E = NonEVEXInst->Operands.size(); I < E; I++) {
+    Record *OpRec1 = EVEXInst->Operands[I].Rec;
+    Record *OpRec2 = NonEVEXInst->Operands[I].Rec;
+
+    if (OpRec1 == OpRec2)
+      continue;
+
+    if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) {
+      if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2))
+        return false;
+    } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
+      if (getMemOperandSize(OpRec1) != getMemOperandSize(OpRec2))
+        return false;
+    } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
+      if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) {
+        return false;
+      }
+    } else
+      return false;
+  }
+  return true;
+}
+
 // Function object - Operator() returns true if the given VEX instruction
 // matches the EVEX instruction of this object.
 class IsMatch {
@@ -121,31 +178,47 @@ class IsMatch {
     // Also for instructions that their EVEX version was upgraded to work with
     // k-registers. For example VPCMPEQBrm (xmm output register) and
     // VPCMPEQBZ128rm (k register output register).
-    for (unsigned i = 0, e = EVEXInst->Operands.size(); i < e; i++) {
-      Record *OpRec1 = EVEXInst->Operands[i].Rec;
-      Record *OpRec2 = VEXInst->Operands[i].Rec;
+    return checkMatchable(EVEXInst, VEXInst);
+  }
+};
 
-      if (OpRec1 == OpRec2)
-        continue;
+class IsMatchAPX {
+  const CodeGenInstruction *EVEXInst;
 
-      if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) {
-        if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2))
-          return false;
-      } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
-        return false;
-      } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
-        if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) {
-          return false;
-        }
-      } else
-        return false;
-    }
+public:
+  IsMatchAPX(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {}
+
+  bool operator()(const CodeGenInstruction *LegacyInst) {
+    Record *RecEVEX = EVEXInst->TheDef;
+    Record *RecLegacy = LegacyInst->TheDef;
+    if (RecLegacy->getValueAsDef("OpSize")->getName() == "OpSize16" &&
+        RecEVEX->getValueAsDef("OpPrefix")->getName() != "PD")
+      return false;
+
+    if (RecLegacy->getValueAsDef("OpSize")->getName() == "OpSize32" &&
+        RecEVEX->getValueAsDef("OpPrefix")->getName() != "PS")
+      return false;
+
+    if (RecEVEX->getValueAsBit("hasREX_W") !=
+        RecLegacy->getValueAsBit("hasREX_W"))
+      return false;
+
+    if (RecLegacy->getValueAsDef("AdSize")->getName() !=
+        RecEVEX->getValueAsDef("AdSize")->getName())
+      return false;
+
+    if (RecLegacy->getValueAsDef("Form") != RecEVEX->getValueAsDef("Form"))
+      return false;
+
+    if (RecLegacy->getValueAsBit("isCodeGenOnly") !=
+        RecEVEX->getValueAsBit("isCodeGenOnly"))
+      return false;
 
-    return true;
+    return checkMatchable(EVEXInst, LegacyInst);
   }
 };
 
-void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
+void X86EVEX2NonEVEXTablesEmitter::run(raw_ostream &OS) {
   emitSourceFileHeader("X86 EVEX2VEX tables", OS);
 
   ArrayRef<const CodeGenInstruction *> NumberedInstructions =
@@ -169,6 +242,17 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
     else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_B &&
              !RI.HasEVEX_L2 && !Def->getValueAsBit("notEVEX2VEXConvertible"))
       EVEXInsts.push_back(Inst);
+
+    if (RI.Encoding == X86Local::EVEX && RI.OpMap == X86Local::T_MAP4 &&
+        !RI.HasEVEX_NF &&
+        !getValueFromBitsInit(
+            Def->getValueAsBitsInit("explicitOpPrefixBits"))) {
+      APXInsts.push_back(Inst);
+    } else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncNormal") {
+      uint64_t Opcode =
+          getValueFromBitsInit(Inst->TheDef->getValueAsBitsInit("Opcode"));
+      LegacyInsts[Opcode].push_back(Inst);
+    }
   }
 
   for (const CodeGenInstruction *EVEXInst : EVEXInsts) {
@@ -203,8 +287,33 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
   // Print both tables
   printTable(EVEX2VEX128, OS);
   printTable(EVEX2VEX256, OS);
+
+  for (const CodeGenInstruction *EVEXInst : APXInsts) {
+    // REV instrs should not appear before encoding optimization.
+    if (EVEXInst->TheDef->getName().ends_with("_REV"))
+      continue;
+    const CodeGenInstruction *LegacyInst = nullptr;
+    if (ManualMap.count(EVEXInst->TheDef->getName())) {
+      auto NonEVEXInstStr =
+          ManualMap.at(StringRef(EVEXInst->TheDef->getName()));
+      Record *LegacyRec = Records.getDef(NonEVEXInstStr);
+      LegacyInst = &(Target.getInstruction(LegacyRec));
+    } else {
+      uint64_t Opcode =
+          getValueFromBitsInit(EVEXInst->TheDef->getValueAsBitsInit("Opcode"));
+      auto Match = llvm::find_if(LegacyInsts[Opcode], IsMatchAPX(EVEXInst));
+      if (Match != LegacyInsts[Opcode].end())
+        LegacyInst = *Match;
+    }
+    if (LegacyInst) {
+      if (!EVEXInst->TheDef->getValueAsBit("hasEVEX_B"))
+        EVEX2LegacyTable.push_back(std::make_pair(EVEXInst, LegacyInst));
+    }
+  }
+  printTable(EVEX2LegacyTable, OS);
 }
 } // namespace
 
-static TableGen::Emitter::OptClass<X86EVEX2VEXTablesEmitter>
-    X("gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables");
+static TableGen::Emitter::OptClass<X86EVEX2NonEVEXTablesEmitter>
+    X("gen-x86-EVEX2NonEVEX-tables",
+      "Generate X86 EVEX to NonEVEX compress tables");
diff --git a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
new file mode 100644
index 00000000000000..a7ca339e006521
--- /dev/null
+++ b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
@@ -0,0 +1,22 @@
+#ifndef EVEXENTRY
+#define EVEXENTRY(EVEX, LEGACY)
+#endif
+// The following entries are added manually b/c:
+//  1.  The prefix is used specially, like RAOINT, prefix could be used to
+//      identify instrs.
+//  2.  Opcode could change when promote to map4, like sha instrs.
+EVEXENTRY(SHA1MSG1rm_EVEX, SHA1MSG1rm)
+EVEXENTRY(SHA1MSG1rr_EVEX, SHA1MSG1rr)
+EVEXENTRY(SHA1MSG2rm_EVEX, SHA1MSG2rm)
+EVEXENTRY(SHA1MSG2rr_EVEX, SHA1MSG2rr)
+EVEXENTRY(SHA1NEXTErm_EVEX, SHA1NEXTErm)
+EVEXENTRY(SHA1NEXTErr_EVEX, SHA1NEXTErr)
+EVEXENTRY(SHA1RNDS4rmi_EVEX, SHA1RNDS4rmi)
+EVEXENTRY(SHA1RNDS4rri_EVEX, SHA1RNDS4rri)
+EVEXENTRY(SHA256MSG1rm_EVEX, SHA256MSG1rm)
+EVEXENTRY(SHA256MSG1rr_EVEX, SHA256MSG1rr)
+EVEXENTRY(SHA256MSG2rm_EVEX, SHA256MSG2rm)
+EVEXENTRY(SHA256MSG2rr_EVEX, SHA256MSG2rr)
+EVEXENTRY(SHA256RNDS2rm_EVEX, SHA256RNDS2rm)
+EVEXENTRY(SHA256RNDS2rr_EVEX, SHA256RNDS2rr)
+#undef EVEXENTRY

>From 8d21f02c9ef42007ab487ac4f11e27ef15a65af3 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Fri, 5 Jan 2024 01:35:26 -0800
Subject: [PATCH 2/2] update tests

---
 .../X86/crc32-intrinsics-fast-isel-x86.ll     |  6 ++--
 .../X86/crc32-intrinsics-fast-isel-x86_64.ll  |  4 +--
 llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll |  6 ++--
 .../CodeGen/X86/crc32-intrinsics-x86_64.ll    |  4 +--
 llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll |  4 +--
 .../CodeGen/X86/movdir-intrinsic-x86_64.ll    |  2 +-
 llvm/test/CodeGen/X86/sha.ll                  | 30 +++++++++----------
 .../TableGen/X86EVEX2NonEVEXTablesEmitter.cpp |  4 ---
 .../TableGen/X86ManualEVEXCompressTables.def  |  5 +++-
 9 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
index 873986e99777d9..fe5182e5ef7319 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
@@ -29,7 +29,7 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u8:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax # EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
   %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
@@ -55,7 +55,7 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u16:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32w %si, %eax # EVEX TO LEGACY Compression encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i16
   %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
@@ -79,7 +79,7 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u32:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32l %esi, %eax # EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
   ret i32 %res
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
index 71d955bda75235..ba5f846c22db04 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
@@ -15,7 +15,7 @@ define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
 ;
 ; EGPR-LABEL: test_mm_crc64_u8:
 ; EGPR:       # %bb.0:
-; EGPR-NEXT:    crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; EGPR-NEXT:    crc32b %sil, %edi # EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
@@ -34,7 +34,7 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
 ; EGPR-LABEL: test_mm_crc64_u64:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32q %rsi, %rax # EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
   ret i64 %res
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
index 84c7f90cfe3c3d..ea4e0ffb109ce5 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
@@ -19,7 +19,7 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
 ; EGPR-LABEL: crc32_32_8:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   ret i32 %tmp
@@ -42,7 +42,7 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
 ; EGPR-LABEL: crc32_32_16:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32w %si, %eax ## EVEX TO LEGACY Compression encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   ret i32 %tmp
@@ -65,7 +65,7 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
 ; EGPR-LABEL: crc32_32_32:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32l %esi, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   ret i32 %tmp
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
index bda26a15b277a4..af2b590b1f6b25 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
@@ -15,7 +15,7 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
 ; EGPR-LABEL: crc32_64_8:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
   ret i64 %tmp
@@ -31,7 +31,7 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
 ; EGPR-LABEL: crc32_64_64:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32q %rsi, %rax ## EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
   ret i64 %tmp
diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
index 4d03510ad5d4f2..023dfb110502bc 100644
--- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
+++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
@@ -18,7 +18,7 @@ define void @test_movdiri(ptr %p, i32 %v) {
 ;
 ; EGPR-LABEL: test_movdiri:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; EGPR-NEXT:    movdiri %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf9,0x37]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.directstore32(ptr %p, i32 %v)
@@ -42,7 +42,7 @@ define void @test_movdir64b(ptr %dst, ptr %src) {
 ;
 ; EGPR-LABEL: test_movdir64b:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; EGPR-NEXT:    movdir64b (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf8,0x3e]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
index ddd44f6d73d592..e3736e29a582c8 100644
--- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
+++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
@@ -10,7 +10,7 @@ define void @test_movdiri(ptr %p, i64 %v) {
 ;
 ; EGPR-LABEL: test_movdiri:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; EGPR-NEXT:    movdiri %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf9,0x37]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.directstore64(ptr %p, i64 %v)
diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll
index d8fa354a391355..65222ba74023f4 100644
--- a/llvm/test/CodeGen/X86/sha.ll
+++ b/llvm/test/CodeGen/X86/sha.ll
@@ -18,7 +18,7 @@ define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha1rnds4rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; EGPR-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0xc1,0x03]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
@@ -38,7 +38,7 @@ define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1rnds4rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0x07,0x03]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -61,7 +61,7 @@ define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha1nexterr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; EGPR-NEXT:    sha1nexte %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc8,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
@@ -81,7 +81,7 @@ define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1nexterm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; EGPR-NEXT:    sha1nexte (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc8,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -104,7 +104,7 @@ define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg1rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; EGPR-NEXT:    sha1msg1 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc9,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
@@ -124,7 +124,7 @@ define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg1rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; EGPR-NEXT:    sha1msg1 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc9,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -147,7 +147,7 @@ define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg2rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; EGPR-NEXT:    sha1msg2 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xca,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
@@ -167,7 +167,7 @@ define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg2rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; EGPR-NEXT:    sha1msg2 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xca,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -198,7 +198,7 @@ define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) n
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
 ; EGPR-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
-; EGPR-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; EGPR-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcb,0xd9]
 ; EGPR-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
@@ -227,7 +227,7 @@ define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwin
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
 ; EGPR-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
-; EGPR-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; EGPR-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcb,0x17]
 ; EGPR-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
@@ -251,7 +251,7 @@ define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha256msg1rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; EGPR-NEXT:    sha256msg1 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcc,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
@@ -271,7 +271,7 @@ define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha256msg1rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07]
+; EGPR-NEXT:    sha256msg1 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcc,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -294,7 +294,7 @@ define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha256msg2rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1]
+; EGPR-NEXT:    sha256msg2 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcd,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
@@ -314,7 +314,7 @@ define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha256msg2rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07]
+; EGPR-NEXT:    sha256msg2 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcd,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -338,7 +338,7 @@ define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwta
 ;
 ; EGPR-LABEL: test_sha1rnds4_zero_extend:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0x07,0x03]
 ; EGPR-NEXT:    xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
diff --git a/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
index 701ae20d908ab5..ff8de4707873f1 100644
--- a/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
@@ -203,10 +203,6 @@ class IsMatchAPX {
         RecLegacy->getValueAsBit("hasREX_W"))
       return false;
 
-    if (RecLegacy->getValueAsDef("AdSize")->getName() !=
-        RecEVEX->getValueAsDef("AdSize")->getName())
-      return false;
-
     if (RecLegacy->getValueAsDef("Form") != RecEVEX->getValueAsDef("Form"))
       return false;
 
diff --git a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
index a7ca339e006521..5bd27a774e153b 100644
--- a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
+++ b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
@@ -2,9 +2,12 @@
 #define EVEXENTRY(EVEX, LEGACY)
 #endif
 // The following entries are added manually b/c:
-//  1.  The prefix is used specially, like RAOINT, prefix could be used to
+//  1.  The prefix is used specially, like movdir64b, prefix could be used to
 //      identify instrs.
 //  2.  Opcode could change when promote to map4, like sha instrs.
+EVEXENTRY(MOVDIR64B32_EVEX, MOVDIR64B32)
+EVEXENTRY(MOVDIR64B64_EVEX, MOVDIR64B64)
+
 EVEXENTRY(SHA1MSG1rm_EVEX, SHA1MSG1rm)
 EVEXENTRY(SHA1MSG1rr_EVEX, SHA1MSG1rr)
 EVEXENTRY(SHA1MSG2rm_EVEX, SHA1MSG2rm)