[llvm] [X86][MC] Compress APX Promoted instrs from evex to legacy encoding to save code size. (PR #77065)

Fri Jan 5 02:01:09 PST 2024

https://github.com/XinWang10 updated https://github.com/llvm/llvm-project/pull/77065

>From a3113dabc8fe0a37ffef11fc507597e538be35e0 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Thu, 4 Jan 2024 01:28:52 -0800
Subject: [PATCH 1/3] basic support

---
 llvm/lib/Target/X86/CMakeLists.txt            |   4 +-
 llvm/lib/Target/X86/X86.h                     |   6 +-
 ...{X86EvexToVex.cpp => X86EvexToNonEvex.cpp} |  96 ++++++----
 llvm/lib/Target/X86/X86InstrInfo.h            |   4 +
 llvm/lib/Target/X86/X86MCInstLower.cpp        |   4 +
 llvm/lib/Target/X86/X86TargetMachine.cpp      |   4 +-
 llvm/test/CodeGen/X86/O0-pipeline.ll          |   2 +-
 .../test/CodeGen/X86/evex-to-vex-compress.mir |   2 +-
 llvm/test/CodeGen/X86/opt-pipeline.ll         |   2 +-
 llvm/utils/TableGen/CMakeLists.txt            |   2 +-
 ...r.cpp => X86EVEX2NonEVEXTablesEmitter.cpp} | 173 ++++++++++++++----
 .../TableGen/X86ManualEVEXCompressTables.def  |  22 +++
 12 files changed, 247 insertions(+), 74 deletions(-)
 rename llvm/lib/Target/X86/{X86EvexToVex.cpp => X86EvexToNonEvex.cpp} (73%)
 rename llvm/utils/TableGen/{X86EVEX2VEXTablesEmitter.cpp => X86EVEX2NonEVEXTablesEmitter.cpp} (53%)
 create mode 100644 llvm/utils/TableGen/X86ManualEVEXCompressTables.def

diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 0b7a98ad6341dd..5cd2a8e40d0d58 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -8,7 +8,7 @@ tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
+tablegen(LLVM X86GenEVEX2NonEVEXTables.inc -gen-x86-EVEX2NonEVEX-tables)
 tablegen(LLVM X86GenExegesis.inc -gen-exegesis)
 tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
@@ -61,7 +61,7 @@ set(sources
   X86InstrFMA3Info.cpp
   X86InstrFoldTables.cpp
   X86InstrInfo.cpp
-  X86EvexToVex.cpp
+  X86EvexToNonEvex.cpp
   X86LoadValueInjectionLoadHardening.cpp
   X86LoadValueInjectionRetHardening.cpp
   X86MCInstLower.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 485afbc1dfbc24..9f2c641cce3aec 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -131,9 +131,9 @@ FunctionPass *createX86FixupBWInsts();
 /// to another, when profitable.
 FunctionPass *createX86DomainReassignmentPass();
 
-/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
+/// This pass replaces EVEX encoded of AVX-512 instructiosn by non-EVEX
 /// encoding when possible in order to reduce code size.
-FunctionPass *createX86EvexToVexInsts();
+FunctionPass *createX86EvexToNonEvexInsts();
 
 /// This pass creates the thunks for the retpoline feature.
 FunctionPass *createX86IndirectThunksPass();
@@ -167,7 +167,7 @@ FunctionPass *createX86SpeculativeLoadHardeningPass();
 FunctionPass *createX86SpeculativeExecutionSideEffectSuppression();
 FunctionPass *createX86ArgumentStackSlotPass();
 
-void initializeEvexToVexInstPassPass(PassRegistry &);
+void initializeEvexToNonEvexInstPassPass(PassRegistry &);
 void initializeFPSPass(PassRegistry &);
 void initializeFixupBWInstPassPass(PassRegistry &);
 void initializeFixupLEAPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
similarity index 73%
rename from llvm/lib/Target/X86/X86EvexToVex.cpp
rename to llvm/lib/Target/X86/X86EvexToNonEvex.cpp
index c425c37b418681..7eebb51e1927ae 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
@@ -1,5 +1,6 @@
-//===- X86EvexToVex.cpp ---------------------------------------------------===//
-// Compress EVEX instructions to VEX encoding when possible to reduce code size
+//===- X86EvexToNonEvex.cpp -----------------------------------------------===//
+// Compress EVEX instructions to Non-EVEX encoding when possible to reduce code
+// size.
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -16,7 +17,11 @@
 /// accessed by instruction is less than 512 bits and when it does not use
 //  the xmm or the mask registers or xmm/ymm registers with indexes higher
 //  than 15.
-/// The pass applies code reduction on the generated code for AVX-512 instrs.
+//  APX promoted instrs use evex encoding which could let them use r16-r31, if
+//  they don't use egpr, we could compress them back to legacy encoding to save
+//  code size.
+/// The pass applies code reduction on the generated code for AVX-512 instrs and
+/// APX promoted instrs.
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,34 +43,35 @@
 
 using namespace llvm;
 
-// Including the generated EVEX2VEX tables.
-struct X86EvexToVexCompressTableEntry {
+// Including the generated EVEX2NonEVEX tables.
+struct X86EvexToNonEvexCompressTableEntry {
   uint16_t EvexOpc;
-  uint16_t VexOpc;
+  uint16_t NonEvexOpc;
 
-  bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
+  bool operator<(const X86EvexToNonEvexCompressTableEntry &RHS) const {
     return EvexOpc < RHS.EvexOpc;
   }
 
-  friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
+  friend bool operator<(const X86EvexToNonEvexCompressTableEntry &TE,
                         unsigned Opc) {
     return TE.EvexOpc < Opc;
   }
 };
-#include "X86GenEVEX2VEXTables.inc"
+#include "X86GenEVEX2NonEVEXTables.inc"
 
-#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
-#define EVEX2VEX_NAME "x86-evex-to-vex-compress"
+#define EVEX2NONEVEX_DESC                                                      \
+  "Compressing EVEX instrs to Non-EVEX encoding when possible"
+#define EVEX2NONEVEX_NAME "x86-evex-to-non-evex-compress"
 
-#define DEBUG_TYPE EVEX2VEX_NAME
+#define DEBUG_TYPE EVEX2NONEVEX_NAME
 
 namespace {
 
-class EvexToVexInstPass : public MachineFunctionPass {
+class EvexToNonEvexInstPass : public MachineFunctionPass {
 public:
   static char ID;
-  EvexToVexInstPass() : MachineFunctionPass(ID) {}
-  StringRef getPassName() const override { return EVEX2VEX_DESC; }
+  EvexToNonEvexInstPass() : MachineFunctionPass(ID) {}
+  StringRef getPassName() const override { return EVEX2NONEVEX_DESC; }
 
   /// Loop over all of the basic blocks, replacing EVEX instructions
   /// by equivalent VEX instructions when possible for reducing code size.
@@ -80,7 +86,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
 
 } // end anonymous namespace
 
-char EvexToVexInstPass::ID = 0;
+char EvexToNonEvexInstPass::ID = 0;
 
 static bool usesExtendedRegister(const MachineInstr &MI) {
   auto isHiRegIdx = [](unsigned Reg) {
@@ -151,15 +157,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) {
 }
 
 // Do any custom cleanup needed to finalize the conversion.
-static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
-  (void)VexOpc;
+static bool performCustomAdjustments(MachineInstr &MI, unsigned NonEvexOpc) {
+  (void)NonEvexOpc;
   unsigned Opc = MI.getOpcode();
   switch (Opc) {
   case X86::VALIGNDZ128rri:
   case X86::VALIGNDZ128rmi:
   case X86::VALIGNQZ128rri:
   case X86::VALIGNQZ128rmi: {
-    assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
+    assert((NonEvexOpc == X86::VPALIGNRrri || NonEvexOpc == X86::VPALIGNRrmi) &&
            "Unexpected new opcode!");
     unsigned Scale =
         (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
@@ -175,8 +181,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
   case X86::VSHUFI32X4Z256rri:
   case X86::VSHUFI64X2Z256rmi:
   case X86::VSHUFI64X2Z256rri: {
-    assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
-            VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
+    assert((NonEvexOpc == X86::VPERM2F128rr || NonEvexOpc == X86::VPERM2I128rr ||
+            NonEvexOpc == X86::VPERM2F128rm || NonEvexOpc == X86::VPERM2I128rm) &&
            "Unexpected new opcode!");
     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
     int64_t ImmVal = Imm.getImm();
@@ -214,6 +220,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) {
 // For EVEX instructions that can be encoded using VEX encoding
 // replace them by the VEX encoding in order to reduce size.
 static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
+  if (!ST.hasAVX512())
+    return false;
   // VEX format.
   // # of bytes: 0,2,3  1      1      0,1   0,1,2,4  0,1
   //  [Prefixes] [VEX]  OPCODE ModR/M [SIB] [DISP]  [IMM]
@@ -239,7 +247,7 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
     return false;
 
   // Use the VEX.L bit to select the 128 or 256-bit table.
-  ArrayRef<X86EvexToVexCompressTableEntry> Table =
+  ArrayRef<X86EvexToNonEvexCompressTableEntry> Table =
       (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
                                     : ArrayRef(X86EvexToVex128CompressTable);
 
@@ -252,15 +260,36 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) {
     return false;
   if (!checkVEXInstPredicate(EvexOpc, ST))
     return false;
-  if (!performCustomAdjustments(MI, I->VexOpc))
+  if (!performCustomAdjustments(MI, I->NonEvexOpc))
     return false;
 
-  MI.setDesc(ST.getInstrInfo()->get(I->VexOpc));
+  MI.setDesc(ST.getInstrInfo()->get(I->NonEvexOpc));
   MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
   return true;
 }
 
-bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
+// For apx promoted instructions, if they don't use egpr, we could try to use
+// legacy encoding to save code size.
+static bool CompressEVEX2LegacyImpl(MachineInstr &MI, const X86Subtarget &ST) {
+  if (!ST.hasEGPR())
+    return false;
+  ArrayRef<X86EvexToNonEvexCompressTableEntry> Table = X86EvexToLegacyCompressTable;
+  unsigned EvexOpc = MI.getOpcode();
+  const auto *I = llvm::lower_bound(Table, EvexOpc);
+  if (I == Table.end() || I->EvexOpc != EvexOpc)
+    return false;
+  unsigned NewOpc = I->NonEvexOpc;
+  for (unsigned Index = 0, Size = MI.getNumOperands(); Index < Size; Index++) {
+    const MachineOperand &Op = MI.getOperand(Index);
+    if (Op.isReg() && X86II::isApxExtendedReg(Op.getReg()))
+      return false;
+  }
+  MI.setDesc(ST.getInstrInfo()->get(NewOpc));
+  MI.setAsmPrinterFlag(X86::AC_EVEX_2_LEGACY);
+  return true;
+}
+
+bool EvexToNonEvexInstPass::runOnMachineFunction(MachineFunction &MF) {
 #ifndef NDEBUG
   // Make sure the tables are sorted.
   static std::atomic<bool> TableChecked(false);
@@ -269,28 +298,33 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
            "X86EvexToVex128CompressTable is not sorted!");
     assert(llvm::is_sorted(X86EvexToVex256CompressTable) &&
            "X86EvexToVex256CompressTable is not sorted!");
+    assert(llvm::is_sorted(X86EvexToLegacyCompressTable) &&
+           "X86EvexToLegacyCompressTable is not sorted!");
     TableChecked.store(true, std::memory_order_relaxed);
   }
 #endif
   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
-  if (!ST.hasAVX512())
+  if (!ST.hasAVX512() && !ST.hasEGPR())
     return false;
 
   bool Changed = false;
 
   /// Go over all basic blocks in function and replace
-  /// EVEX encoded instrs by VEX encoding when possible.
+  /// EVEX encoded instrs by VEX/Legacy encoding when possible.
   for (MachineBasicBlock &MBB : MF) {
     // Traverse the basic block.
-    for (MachineInstr &MI : MBB)
+    for (MachineInstr &MI : MBB) {
       Changed |= CompressEvexToVexImpl(MI, ST);
+      Changed |= CompressEVEX2LegacyImpl(MI, ST);
+    }
   }
 
   return Changed;
 }
 
-INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
+INITIALIZE_PASS(EvexToNonEvexInstPass, EVEX2NONEVEX_NAME, EVEX2NONEVEX_DESC,
+                false, false)
 
-FunctionPass *llvm::createX86EvexToVexInsts() {
-  return new EvexToVexInstPass();
+FunctionPass *llvm::createX86EvexToNonEvexInsts() {
+  return new EvexToNonEvexInstPass();
 }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index eac8d79eb8a32a..685c033e10f49a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -31,6 +31,10 @@ namespace X86 {
 enum AsmComments {
   // For instr that was compressed from EVEX to VEX.
   AC_EVEX_2_VEX = MachineInstr::TAsmComments
+  , // For instrs that was compressed from ND to non-ND.
+  AC_ND_2_NONND = AC_EVEX_2_VEX << 1
+  , // For instrs that was compressed from EVEX to Legacy.
+  AC_EVEX_2_LEGACY = AC_ND_2_NONND << 1
 };
 
 /// Return a pair of condition code for the given predicate and whether
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e1a67f61e76640..f854b109fc80e3 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2060,6 +2060,10 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
   if (TM.Options.MCOptions.ShowMCEncoding) {
     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
+    else if (MI->getAsmPrinterFlags() & X86::AC_ND_2_NONND)
+      OutStreamer->AddComment("ND TO non-ND Compression ", false);
+    else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
+      OutStreamer->AddComment("EVEX TO LEGACY Compression ", false);
   }
 
   // Add comments for values loaded from constant pool.
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 5668b514d6dec0..05f1dbd63f4f1f 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -75,7 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
   initializeGlobalISel(PR);
   initializeWinEHStatePassPass(PR);
   initializeFixupBWInstPassPass(PR);
-  initializeEvexToVexInstPassPass(PR);
+  initializeEvexToNonEvexInstPassPass(PR);
   initializeFixupLEAPassPass(PR);
   initializeFPSPass(PR);
   initializeX86FixupSetCCPassPass(PR);
@@ -575,7 +575,7 @@ void X86PassConfig::addPreEmitPass() {
     addPass(createX86FixupInstTuning());
     addPass(createX86FixupVectorConstants());
   }
-  addPass(createX86EvexToVexInsts());
+  addPass(createX86EvexToNonEvexInsts());
   addPass(createX86DiscriminateMemOpsPass());
   addPass(createX86InsertPrefetchPass());
   addPass(createX86InsertX87waitPass());
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 402645ed1e2e5d..feec8d3db27e64 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -68,7 +68,7 @@
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
 ; CHECK-NEXT:       X86 Indirect Branch Tracking
 ; CHECK-NEXT:       X86 vzeroupper inserter
-; CHECK-NEXT:       Compressing EVEX instrs to VEX encoding when possibl
+; CHECK-NEXT:       Compressing EVEX instrs to Non-EVEX encoding when possible
 ; CHECK-NEXT:       X86 Discriminate Memory Operands
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       X86 insert wait instruction
diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
index 06d3c1532c3eaa..928ac700ee009d 100644
--- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
+++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-vex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-non-evex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
 # This test verifies VEX encoding for AVX-512 instructions that use registers of low indexes and
 # do not use zmm or mask registers and have a corresponding AVX/AVX2 opcode
 
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index fb8d2335b34106..a44e04e8ee41ed 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -205,7 +205,7 @@
 ; CHECK-NEXT:       X86 LEA Fixup
 ; CHECK-NEXT:       X86 Fixup Inst Tuning
 ; CHECK-NEXT:       X86 Fixup Vector Constants
-; CHECK-NEXT:       Compressing EVEX instrs to VEX encoding when possible
+; CHECK-NEXT:       Compressing EVEX instrs to Non-EVEX encoding when possible
 ; CHECK-NEXT:       X86 Discriminate Memory Operands
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       X86 insert wait instruction
diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt
index 071ea3bc07054b..32332c121604e7 100644
--- a/llvm/utils/TableGen/CMakeLists.txt
+++ b/llvm/utils/TableGen/CMakeLists.txt
@@ -81,7 +81,7 @@ add_tablegen(llvm-tblgen LLVM
   Types.cpp
   VarLenCodeEmitterGen.cpp
   X86DisassemblerTables.cpp
-  X86EVEX2VEXTablesEmitter.cpp
+  X86EVEX2NonEVEXTablesEmitter.cpp
   X86FoldTablesEmitter.cpp
   X86MnemonicTables.cpp
   X86ModRMFilters.cpp
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
similarity index 53%
rename from llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
rename to llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
index c80d9a199fa3c1..701ae20d908ab5 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
@@ -1,4 +1,4 @@
-//===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===//
+//=- utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp - X86 backend-*- C++ -*-//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -23,7 +23,12 @@ using namespace X86Disassembler;
 
 namespace {
 
-class X86EVEX2VEXTablesEmitter {
+const std::map<StringRef, StringRef> ManualMap = {
+#define EVEXENTRY(EVEX, NonEVEXInstStr) {#EVEX, #NonEVEXInstStr},
+#include "X86ManualEVEXCompressTables.def"
+};
+
+class X86EVEX2NonEVEXTablesEmitter {
   RecordKeeper &Records;
   CodeGenTarget Target;
 
@@ -40,28 +45,55 @@ class X86EVEX2VEXTablesEmitter {
   std::vector<Entry> EVEX2VEX128;
   std::vector<Entry> EVEX2VEX256;
 
+  // Hold all possibly compressed APX instructions, including only ND and EGPR
+  // instruction so far
+  std::vector<const CodeGenInstruction *> APXInsts;
+  // Hold all X86 instructions. Divided into groups with same opcodes
+  // to make the search more efficient
+  std::map<uint64_t, std::vector<const CodeGenInstruction *>> LegacyInsts;
+  // Represent EVEX to Legacy compress tables.
+  std::vector<Entry> EVEX2LegacyTable;
+
 public:
-  X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+  X86EVEX2NonEVEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
 
   // run - Output X86 EVEX2VEX tables.
   void run(raw_ostream &OS);
 
 private:
   // Prints the given table as a C++ array of type
-  // X86EvexToVexCompressTableEntry
+  // X86EvexToNonEvexCompressTableEntry
   void printTable(const std::vector<Entry> &Table, raw_ostream &OS);
+  // X86EVEXToLegacyCompressTableEntry
+  void printEVEX2LegacyTable(const std::vector<Entry> &Table, raw_ostream &OS);
+  void addManualEntry(const CodeGenInstruction *EVEXInstr,
+                      const CodeGenInstruction *LegacyInstr,
+                      const char *TableName);
 };
 
-void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
+void X86EVEX2NonEVEXTablesEmitter::printTable(const std::vector<Entry> &Table,
                                           raw_ostream &OS) {
-  StringRef Size = (Table == EVEX2VEX128) ? "128" : "256";
 
-  OS << "// X86 EVEX encoded instructions that have a VEX " << Size
-     << " encoding\n"
-     << "// (table format: <EVEX opcode, VEX-" << Size << " opcode>).\n"
-     << "static const X86EvexToVexCompressTableEntry X86EvexToVex" << Size
+  StringRef TargetEnc;
+  StringRef TableName;
+  StringRef Size;
+  if (Table == EVEX2LegacyTable){
+    TargetEnc = "Legacy";
+    TableName = "X86EvexToLegacy";
+  }
+  else {
+    TargetEnc = "VEX";
+    TableName = "X86EvexToVex";
+    Size = (Table == EVEX2VEX128) ? "128" : "256";
+  }
+
+  OS << "// X86 EVEX encoded instructions that have a " << TargetEnc << " "
+     << Size << " encoding\n"
+     << "// (table format: <EVEX opcode, " << TargetEnc << Size
+     << " opcode>).\n"
+     << "static const X86EvexToNonEvexCompressTableEntry " << TableName << Size
      << "CompressTable[] = {\n"
-     << "  // EVEX scalar with corresponding VEX.\n";
+     << "  // EVEX scalar with corresponding " << TargetEnc << ".\n";
 
   // Print all entries added to the table
   for (const auto &Pair : Table) {
@@ -85,6 +117,31 @@ static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
   return Value;
 }
 
+static bool checkMatchable(const CodeGenInstruction *EVEXInst,
+                           const CodeGenInstruction *NonEVEXInst) {
+  for (unsigned I = 0, E = NonEVEXInst->Operands.size(); I < E; I++) {
+    Record *OpRec1 = EVEXInst->Operands[I].Rec;
+    Record *OpRec2 = NonEVEXInst->Operands[I].Rec;
+
+    if (OpRec1 == OpRec2)
+      continue;
+
+    if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) {
+      if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2))
+        return false;
+    } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
+      if (getMemOperandSize(OpRec1) != getMemOperandSize(OpRec2))
+        return false;
+    } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
+      if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) {
+        return false;
+      }
+    } else
+      return false;
+  }
+  return true;
+}
+
 // Function object - Operator() returns true if the given VEX instruction
 // matches the EVEX instruction of this object.
 class IsMatch {
@@ -121,31 +178,47 @@ class IsMatch {
     // Also for instructions that their EVEX version was upgraded to work with
     // k-registers. For example VPCMPEQBrm (xmm output register) and
     // VPCMPEQBZ128rm (k register output register).
-    for (unsigned i = 0, e = EVEXInst->Operands.size(); i < e; i++) {
-      Record *OpRec1 = EVEXInst->Operands[i].Rec;
-      Record *OpRec2 = VEXInst->Operands[i].Rec;
+    return checkMatchable(EVEXInst, VEXInst);
+  }
+};
 
-      if (OpRec1 == OpRec2)
-        continue;
+class IsMatchAPX {
+  const CodeGenInstruction *EVEXInst;
 
-      if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) {
-        if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2))
-          return false;
-      } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
-        return false;
-      } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
-        if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) {
-          return false;
-        }
-      } else
-        return false;
-    }
+public:
+  IsMatchAPX(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {}
+
+  bool operator()(const CodeGenInstruction *LegacyInst) {
+    Record *RecEVEX = EVEXInst->TheDef;
+    Record *RecLegacy = LegacyInst->TheDef;
+    if (RecLegacy->getValueAsDef("OpSize")->getName() == "OpSize16" &&
+        RecEVEX->getValueAsDef("OpPrefix")->getName() != "PD")
+      return false;
+
+    if (RecLegacy->getValueAsDef("OpSize")->getName() == "OpSize32" &&
+        RecEVEX->getValueAsDef("OpPrefix")->getName() != "PS")
+      return false;
+
+    if (RecEVEX->getValueAsBit("hasREX_W") !=
+        RecLegacy->getValueAsBit("hasREX_W"))
+      return false;
+
+    if (RecLegacy->getValueAsDef("AdSize")->getName() !=
+        RecEVEX->getValueAsDef("AdSize")->getName())
+      return false;
+
+    if (RecLegacy->getValueAsDef("Form") != RecEVEX->getValueAsDef("Form"))
+      return false;
+
+    if (RecLegacy->getValueAsBit("isCodeGenOnly") !=
+        RecEVEX->getValueAsBit("isCodeGenOnly"))
+      return false;
 
-    return true;
+    return checkMatchable(EVEXInst, LegacyInst);
   }
 };
 
-void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
+void X86EVEX2NonEVEXTablesEmitter::run(raw_ostream &OS) {
   emitSourceFileHeader("X86 EVEX2VEX tables", OS);
 
   ArrayRef<const CodeGenInstruction *> NumberedInstructions =
@@ -169,6 +242,17 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
     else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_B &&
              !RI.HasEVEX_L2 && !Def->getValueAsBit("notEVEX2VEXConvertible"))
       EVEXInsts.push_back(Inst);
+
+    if (RI.Encoding == X86Local::EVEX && RI.OpMap == X86Local::T_MAP4 &&
+        !RI.HasEVEX_NF &&
+        !getValueFromBitsInit(
+            Def->getValueAsBitsInit("explicitOpPrefixBits"))) {
+      APXInsts.push_back(Inst);
+    } else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncNormal") {
+      uint64_t Opcode =
+          getValueFromBitsInit(Inst->TheDef->getValueAsBitsInit("Opcode"));
+      LegacyInsts[Opcode].push_back(Inst);
+    }
   }
 
   for (const CodeGenInstruction *EVEXInst : EVEXInsts) {
@@ -203,8 +287,33 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
   // Print both tables
   printTable(EVEX2VEX128, OS);
   printTable(EVEX2VEX256, OS);
+
+  for (const CodeGenInstruction *EVEXInst : APXInsts) {
+    // REV instrs should not appear before encoding optimization.
+    if (EVEXInst->TheDef->getName().ends_with("_REV"))
+      continue;
+    const CodeGenInstruction *LegacyInst = nullptr;
+    if (ManualMap.count(EVEXInst->TheDef->getName())) {
+      auto NonEVEXInstStr =
+          ManualMap.at(StringRef(EVEXInst->TheDef->getName()));
+      Record *LegacyRec = Records.getDef(NonEVEXInstStr);
+      LegacyInst = &(Target.getInstruction(LegacyRec));
+    } else {
+      uint64_t Opcode =
+          getValueFromBitsInit(EVEXInst->TheDef->getValueAsBitsInit("Opcode"));
+      auto Match = llvm::find_if(LegacyInsts[Opcode], IsMatchAPX(EVEXInst));
+      if (Match != LegacyInsts[Opcode].end())
+        LegacyInst = *Match;
+    }
+    if (LegacyInst) {
+      if (!EVEXInst->TheDef->getValueAsBit("hasEVEX_B"))
+        EVEX2LegacyTable.push_back(std::make_pair(EVEXInst, LegacyInst));
+    }
+  }
+  printTable(EVEX2LegacyTable, OS);
 }
 } // namespace
 
-static TableGen::Emitter::OptClass<X86EVEX2VEXTablesEmitter>
-    X("gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables");
+static TableGen::Emitter::OptClass<X86EVEX2NonEVEXTablesEmitter>
+    X("gen-x86-EVEX2NonEVEX-tables",
+      "Generate X86 EVEX to NonEVEX compress tables");
diff --git a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
new file mode 100644
index 00000000000000..a7ca339e006521
--- /dev/null
+++ b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
@@ -0,0 +1,22 @@
+#ifndef EVEXENTRY
+#define EVEXENTRY(EVEX, LEGACY)
+#endif
+// The following entries are added manually b/c:
+//  1.  The prefix is used specially, like RAOINT, prefix could be used to
+//      identify instrs.
+//  2.  Opcode could change when promote to map4, like sha instrs.
+EVEXENTRY(SHA1MSG1rm_EVEX, SHA1MSG1rm)
+EVEXENTRY(SHA1MSG1rr_EVEX, SHA1MSG1rr)
+EVEXENTRY(SHA1MSG2rm_EVEX, SHA1MSG2rm)
+EVEXENTRY(SHA1MSG2rr_EVEX, SHA1MSG2rr)
+EVEXENTRY(SHA1NEXTErm_EVEX, SHA1NEXTErm)
+EVEXENTRY(SHA1NEXTErr_EVEX, SHA1NEXTErr)
+EVEXENTRY(SHA1RNDS4rmi_EVEX, SHA1RNDS4rmi)
+EVEXENTRY(SHA1RNDS4rri_EVEX, SHA1RNDS4rri)
+EVEXENTRY(SHA256MSG1rm_EVEX, SHA256MSG1rm)
+EVEXENTRY(SHA256MSG1rr_EVEX, SHA256MSG1rr)
+EVEXENTRY(SHA256MSG2rm_EVEX, SHA256MSG2rm)
+EVEXENTRY(SHA256MSG2rr_EVEX, SHA256MSG2rr)
+EVEXENTRY(SHA256RNDS2rm_EVEX, SHA256RNDS2rm)
+EVEXENTRY(SHA256RNDS2rr_EVEX, SHA256RNDS2rr)
+#undef EVEXENTRY

>From 8d21f02c9ef42007ab487ac4f11e27ef15a65af3 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Fri, 5 Jan 2024 01:35:26 -0800
Subject: [PATCH 2/3] update tests

---
 .../X86/crc32-intrinsics-fast-isel-x86.ll     |  6 ++--
 .../X86/crc32-intrinsics-fast-isel-x86_64.ll  |  4 +--
 llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll |  6 ++--
 .../CodeGen/X86/crc32-intrinsics-x86_64.ll    |  4 +--
 llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll |  4 +--
 .../CodeGen/X86/movdir-intrinsic-x86_64.ll    |  2 +-
 llvm/test/CodeGen/X86/sha.ll                  | 30 +++++++++----------
 .../TableGen/X86EVEX2NonEVEXTablesEmitter.cpp |  4 ---
 .../TableGen/X86ManualEVEXCompressTables.def  |  5 +++-
 9 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
index 873986e99777d9..fe5182e5ef7319 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
@@ -29,7 +29,7 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u8:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax # EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
   %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
@@ -55,7 +55,7 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u16:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32w %si, %eax # EVEX TO LEGACY Compression encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i16
   %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
@@ -79,7 +79,7 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u32:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32l %esi, %eax # EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
   ret i32 %res
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
index 71d955bda75235..ba5f846c22db04 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
@@ -15,7 +15,7 @@ define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
 ;
 ; EGPR-LABEL: test_mm_crc64_u8:
 ; EGPR:       # %bb.0:
-; EGPR-NEXT:    crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; EGPR-NEXT:    crc32b %sil, %edi # EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
@@ -34,7 +34,7 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
 ; EGPR-LABEL: test_mm_crc64_u64:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32q %rsi, %rax # EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
   ret i64 %res
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
index 84c7f90cfe3c3d..ea4e0ffb109ce5 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
@@ -19,7 +19,7 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
 ; EGPR-LABEL: crc32_32_8:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   ret i32 %tmp
@@ -42,7 +42,7 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
 ; EGPR-LABEL: crc32_32_16:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32w %si, %eax ## EVEX TO LEGACY Compression encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   ret i32 %tmp
@@ -65,7 +65,7 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
 ; EGPR-LABEL: crc32_32_32:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32l %esi, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   ret i32 %tmp
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
index bda26a15b277a4..af2b590b1f6b25 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
@@ -15,7 +15,7 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
 ; EGPR-LABEL: crc32_64_8:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
   ret i64 %tmp
@@ -31,7 +31,7 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
 ; EGPR-LABEL: crc32_64_64:
 ; EGPR:       ## %bb.0:
 ; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
-; EGPR-NEXT:    crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; EGPR-NEXT:    crc32q %rsi, %rax ## EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
 ; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
   ret i64 %tmp
diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
index 4d03510ad5d4f2..023dfb110502bc 100644
--- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
+++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
@@ -18,7 +18,7 @@ define void @test_movdiri(ptr %p, i32 %v) {
 ;
 ; EGPR-LABEL: test_movdiri:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; EGPR-NEXT:    movdiri %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf9,0x37]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.directstore32(ptr %p, i32 %v)
@@ -42,7 +42,7 @@ define void @test_movdir64b(ptr %dst, ptr %src) {
 ;
 ; EGPR-LABEL: test_movdir64b:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; EGPR-NEXT:    movdir64b (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf8,0x3e]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
index ddd44f6d73d592..e3736e29a582c8 100644
--- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
+++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
@@ -10,7 +10,7 @@ define void @test_movdiri(ptr %p, i64 %v) {
 ;
 ; EGPR-LABEL: test_movdiri:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; EGPR-NEXT:    movdiri %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf9,0x37]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.directstore64(ptr %p, i64 %v)
diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll
index d8fa354a391355..65222ba74023f4 100644
--- a/llvm/test/CodeGen/X86/sha.ll
+++ b/llvm/test/CodeGen/X86/sha.ll
@@ -18,7 +18,7 @@ define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha1rnds4rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; EGPR-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0xc1,0x03]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
@@ -38,7 +38,7 @@ define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1rnds4rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0x07,0x03]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -61,7 +61,7 @@ define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha1nexterr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; EGPR-NEXT:    sha1nexte %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc8,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
@@ -81,7 +81,7 @@ define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1nexterm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; EGPR-NEXT:    sha1nexte (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc8,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -104,7 +104,7 @@ define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg1rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; EGPR-NEXT:    sha1msg1 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc9,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
@@ -124,7 +124,7 @@ define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg1rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; EGPR-NEXT:    sha1msg1 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc9,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -147,7 +147,7 @@ define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg2rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; EGPR-NEXT:    sha1msg2 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xca,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
@@ -167,7 +167,7 @@ define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha1msg2rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; EGPR-NEXT:    sha1msg2 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xca,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -198,7 +198,7 @@ define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) n
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
 ; EGPR-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
-; EGPR-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; EGPR-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcb,0xd9]
 ; EGPR-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
@@ -227,7 +227,7 @@ define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwin
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
 ; EGPR-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
-; EGPR-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; EGPR-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcb,0x17]
 ; EGPR-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
@@ -251,7 +251,7 @@ define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha256msg1rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; EGPR-NEXT:    sha256msg1 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcc,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
@@ -271,7 +271,7 @@ define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha256msg1rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07]
+; EGPR-NEXT:    sha256msg1 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcc,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -294,7 +294,7 @@ define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable
 ;
 ; EGPR-LABEL: test_sha256msg2rr:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1]
+; EGPR-NEXT:    sha256msg2 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcd,0xc1]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
@@ -314,7 +314,7 @@ define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
 ;
 ; EGPR-LABEL: test_sha256msg2rm:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07]
+; EGPR-NEXT:    sha256msg2 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcd,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
@@ -338,7 +338,7 @@ define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwta
 ;
 ; EGPR-LABEL: test_sha1rnds4_zero_extend:
 ; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0x07,0x03]
 ; EGPR-NEXT:    xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
diff --git a/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
index 701ae20d908ab5..ff8de4707873f1 100644
--- a/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
@@ -203,10 +203,6 @@ class IsMatchAPX {
         RecLegacy->getValueAsBit("hasREX_W"))
       return false;
 
-    if (RecLegacy->getValueAsDef("AdSize")->getName() !=
-        RecEVEX->getValueAsDef("AdSize")->getName())
-      return false;
-
     if (RecLegacy->getValueAsDef("Form") != RecEVEX->getValueAsDef("Form"))
       return false;
 
diff --git a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
index a7ca339e006521..5bd27a774e153b 100644
--- a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
+++ b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def
@@ -2,9 +2,12 @@
 #define EVEXENTRY(EVEX, LEGACY)
 #endif
 // The following entries are added manually b/c:
-//  1.  The prefix is used specially, like RAOINT, prefix could be used to
+//  1.  The prefix is used specially, like movdir64b, prefix could be used to
 //      identify instrs.
 //  2.  Opcode could change when promote to map4, like sha instrs.
+EVEXENTRY(MOVDIR64B32_EVEX, MOVDIR64B32)
+EVEXENTRY(MOVDIR64B64_EVEX, MOVDIR64B64)
+
 EVEXENTRY(SHA1MSG1rm_EVEX, SHA1MSG1rm)
 EVEXENTRY(SHA1MSG1rr_EVEX, SHA1MSG1rr)
 EVEXENTRY(SHA1MSG2rm_EVEX, SHA1MSG2rm)

>From 10e36ec6f9ed61e8f18a9ace43d62b7169cbe5e5 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Fri, 5 Jan 2024 02:00:56 -0800
Subject: [PATCH 3/3] clang format

---
 llvm/lib/Target/X86/X86EvexToNonEvex.cpp        |  6 +++---
 llvm/lib/Target/X86/X86InstrInfo.h              |  8 +++-----
 llvm/lib/Target/X86/X86MCInstLower.cpp          |  2 --
 .../TableGen/X86EVEX2NonEVEXTablesEmitter.cpp   | 17 ++++++++---------
 4 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86EvexToNonEvex.cpp b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
index 7eebb51e1927ae..40470c2129f99a 100644
--- a/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp
@@ -165,7 +165,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NonEvexOpc) {
   case X86::VALIGNDZ128rmi:
   case X86::VALIGNQZ128rri:
   case X86::VALIGNQZ128rmi: {
-    assert((NonEvexOpc == X86::VPALIGNRrri || NonEvexOpc == X86::VPALIGNRrmi) &&
+    assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
            "Unexpected new opcode!");
     unsigned Scale =
         (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
@@ -181,8 +181,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NonEvexOpc) {
   case X86::VSHUFI32X4Z256rri:
   case X86::VSHUFI64X2Z256rmi:
   case X86::VSHUFI64X2Z256rri: {
-    assert((NonEvexOpc == X86::VPERM2F128rr || NonEvexOpc == X86::VPERM2I128rr ||
-            NonEvexOpc == X86::VPERM2F128rm || NonEvexOpc == X86::VPERM2I128rm) &&
+    assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
+            VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
            "Unexpected new opcode!");
     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
     int64_t ImmVal = Imm.getImm();
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 685c033e10f49a..87f4d3d72c3b72 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -30,11 +30,9 @@ namespace X86 {
 
 enum AsmComments {
   // For instr that was compressed from EVEX to VEX.
-  AC_EVEX_2_VEX = MachineInstr::TAsmComments
-  , // For instrs that was compressed from ND to non-ND.
-  AC_ND_2_NONND = AC_EVEX_2_VEX << 1
-  , // For instrs that was compressed from EVEX to Legacy.
-  AC_EVEX_2_LEGACY = AC_ND_2_NONND << 1
+  AC_EVEX_2_VEX = MachineInstr::TAsmComments,
+  // For instrs that was compressed from EVEX to Legacy.
+  AC_EVEX_2_LEGACY = AC_EVEX_2_VEX << 1
 };
 
 /// Return a pair of condition code for the given predicate and whether
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index f854b109fc80e3..b3544bb5a278dc 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2060,8 +2060,6 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
   if (TM.Options.MCOptions.ShowMCEncoding) {
     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
-    else if (MI->getAsmPrinterFlags() & X86::AC_ND_2_NONND)
-      OutStreamer->AddComment("ND TO non-ND Compression ", false);
     else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
       OutStreamer->AddComment("EVEX TO LEGACY Compression ", false);
   }
diff --git a/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
index ff8de4707873f1..54c2564dd3d4fd 100644
--- a/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp
@@ -72,16 +72,15 @@ class X86EVEX2NonEVEXTablesEmitter {
 };
 
 void X86EVEX2NonEVEXTablesEmitter::printTable(const std::vector<Entry> &Table,
-                                          raw_ostream &OS) {
+                                              raw_ostream &OS) {
 
   StringRef TargetEnc;
   StringRef TableName;
   StringRef Size;
-  if (Table == EVEX2LegacyTable){
+  if (Table == EVEX2LegacyTable) {
     TargetEnc = "Legacy";
     TableName = "X86EvexToLegacy";
-  }
-  else {
+  } else {
     TargetEnc = "VEX";
     TableName = "X86EvexToVex";
     Size = (Table == EVEX2VEX128) ? "128" : "256";
@@ -155,8 +154,8 @@ class IsMatch {
     RecognizableInstrBase EVEXRI(*EVEXInst);
     bool VEX_W = VEXRI.HasREX_W;
     bool EVEX_W = EVEXRI.HasREX_W;
-    bool VEX_WIG  = VEXRI.IgnoresW;
-    bool EVEX_WIG  = EVEXRI.IgnoresW;
+    bool VEX_WIG = VEXRI.IgnoresW;
+    bool EVEX_WIG = EVEXRI.IgnoresW;
     bool EVEX_W1_VEX_W0 = EVEXInst->TheDef->getValueAsBit("EVEX_W1_VEX_W0");
 
     if (VEXRI.IsCodeGenOnly != EVEXRI.IsCodeGenOnly ||
@@ -252,15 +251,15 @@ void X86EVEX2NonEVEXTablesEmitter::run(raw_ostream &OS) {
   }
 
   for (const CodeGenInstruction *EVEXInst : EVEXInsts) {
-    uint64_t Opcode = getValueFromBitsInit(EVEXInst->TheDef->
-                                           getValueAsBitsInit("Opcode"));
+    uint64_t Opcode =
+        getValueFromBitsInit(EVEXInst->TheDef->getValueAsBitsInit("Opcode"));
     // For each EVEX instruction look for a VEX match in the appropriate vector
     // (instructions with the same opcode) using function object IsMatch.
     // Allow EVEX2VEXOverride to explicitly specify a match.
     const CodeGenInstruction *VEXInst = nullptr;
     if (!EVEXInst->TheDef->isValueUnset("EVEX2VEXOverride")) {
       StringRef AltInstStr =
-        EVEXInst->TheDef->getValueAsString("EVEX2VEXOverride");
+          EVEXInst->TheDef->getValueAsString("EVEX2VEXOverride");
       Record *AltInstRec = Records.getDef(AltInstStr);
       assert(AltInstRec && "EVEX2VEXOverride instruction not found!");
       VEXInst = &Target.getInstruction(AltInstRec);