[llvm] [X86] Support lowering for APX promoted BMI instructions. (PR #77433)

Wed Jan 10 22:38:50 PST 2024

https://github.com/XinWang10 updated https://github.com/llvm/llvm-project/pull/77433

>From d8d452ae3aa063b9c312de195b0afcabb594d035 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 9 Jan 2024 00:45:01 -0800
Subject: [PATCH 1/3] [X86] Support lowering for APX promoted BMI instructions.

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp       |  44 +-
 llvm/lib/Target/X86/X86InstrArithmetic.td     |  13 +-
 llvm/lib/Target/X86/X86InstrCompiler.td       |  60 ++-
 llvm/lib/Target/X86/X86InstrMisc.td           |  61 ++-
 llvm/lib/Target/X86/X86InstrShiftRotate.td    |  82 +++-
 .../X86/bmi-intrinsics-fast-isel-x86_64.ll    |  78 +++
 .../CodeGen/X86/bmi-intrinsics-fast-isel.ll   |  98 +++-
 llvm/test/CodeGen/X86/bmi-x86_64.ll           |  56 +++
 llvm/test/CodeGen/X86/bmi.ll                  | 455 ++++++++++++++++++
 llvm/test/CodeGen/X86/bmi2-x86_64.ll          |  62 +++
 llvm/test/CodeGen/X86/bmi2.ll                 | 121 +++++
 llvm/test/CodeGen/X86/shift-bmi2.ll           | 115 +++++
 12 files changed, 1222 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 53ce720be2da4c..8d519f181549ff 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4087,14 +4087,17 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
   SDValue Control;
   unsigned ROpc, MOpc;
 
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
   if (!PreferBEXTR) {
     assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
     // If we can't make use of BEXTR then we can't fuse shift+mask stages.
     // Let's perform the mask first, and apply shift later. Note that we need to
     // widen the mask to account for the fact that we'll apply shift afterwards!
     Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
-    ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
-    MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
+    ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
+                           : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
+    MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
+                           : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
     unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
     Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
   } else {
@@ -4109,12 +4112,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
     } else {
       assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
       // BMI requires the immediate to placed in a register.
-      ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
-      MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+      ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
+                             : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
+      MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
+                             : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
       unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
       Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
     }
   }
+#undef GET_EGPR_IF_ENABLED
 
   MachineSDNode *NewNode;
   SDValue Input = N0->getOperand(0);
@@ -5482,26 +5488,32 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
     switch (NVT.SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
     case MVT::i32:
-      Opc  = UseMULXHi ? X86::MULX32Hrr :
-             UseMULX ? X86::MULX32rr :
-             IsSigned ? X86::IMUL32r : X86::MUL32r;
-      MOpc = UseMULXHi ? X86::MULX32Hrm :
-             UseMULX ? X86::MULX32rm :
-             IsSigned ? X86::IMUL32m : X86::MUL32m;
+      Opc = UseMULXHi  ? X86::MULX32Hrr
+            : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
+            : IsSigned ? X86::IMUL32r
+                       : X86::MUL32r;
+      MOpc = UseMULXHi  ? X86::MULX32Hrm
+             : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
+             : IsSigned ? X86::IMUL32m
+                        : X86::MUL32m;
       LoReg = UseMULX ? X86::EDX : X86::EAX;
       HiReg = X86::EDX;
       break;
     case MVT::i64:
-      Opc  = UseMULXHi ? X86::MULX64Hrr :
-             UseMULX ? X86::MULX64rr :
-             IsSigned ? X86::IMUL64r : X86::MUL64r;
-      MOpc = UseMULXHi ? X86::MULX64Hrm :
-             UseMULX ? X86::MULX64rm :
-             IsSigned ? X86::IMUL64m : X86::MUL64m;
+      Opc = UseMULXHi  ? X86::MULX64Hrr
+            : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
+            : IsSigned ? X86::IMUL64r
+                       : X86::MUL64r;
+      MOpc = UseMULXHi  ? X86::MULX64Hrm
+             : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
+             : IsSigned ? X86::IMUL64m
+                        : X86::MUL64m;
       LoReg = UseMULX ? X86::RDX : X86::RAX;
       HiReg = X86::RDX;
       break;
+#undef GET_EGPR_IF_ENABLED
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 76b0fe5f5cad18..289141ce6c33f5 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1338,7 +1338,7 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
 defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
 }
 
-let Predicates = [HasBMI], AddedComplexity = -6 in {
+let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
   def : Pat<(and (not GR32:$src1), GR32:$src2),
             (ANDN32rr GR32:$src1, GR32:$src2)>;
   def : Pat<(and (not GR64:$src1), GR64:$src2),
@@ -1349,6 +1349,17 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
             (ANDN64rm GR64:$src1, addr:$src2)>;
 }
 
+let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in {
+  def : Pat<(and (not GR32:$src1), GR32:$src2),
+            (ANDN32rr_EVEX GR32:$src1, GR32:$src2)>;
+  def : Pat<(and (not GR64:$src1), GR64:$src2),
+            (ANDN64rr_EVEX GR64:$src1, GR64:$src2)>;
+  def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+            (ANDN32rm_EVEX GR32:$src1, addr:$src2)>;
+  def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+            (ANDN64rm_EVEX GR64:$src1, addr:$src2)>;
+}
+
 //===----------------------------------------------------------------------===//
 // MULX Instruction
 //
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 422391a6e02ae0..3510bdeeff4c09 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1864,7 +1864,7 @@ def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
 def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
           (SHRD64rrCL GR64:$src1, GR64:$src2)>;
 
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
   let AddedComplexity = 1 in {
     def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
               (SARX32rr GR32:$src1,
@@ -1922,6 +1922,64 @@ let Predicates = [HasBMI2] in {
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 }
 
+let Predicates = [HasBMI2, HasEGPR] in {
+  let AddedComplexity = 1 in {
+    def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
+              (SARX32rr_EVEX GR32:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
+              (SARX64rr_EVEX GR64:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
+              (SHRX32rr_EVEX GR32:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
+              (SHRX64rr_EVEX GR64:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
+              (SHLX32rr_EVEX GR32:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
+              (SHLX64rr_EVEX GR64:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  }
+
+  def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+            (SARX32rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+            (SARX64rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+  def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+            (SHRX32rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+            (SHRX64rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+  def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+            (SHLX32rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+            (SHLX64rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
 // Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
 multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
                             Instruction BTS, Instruction BTC,
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 753cf62392a17b..a51720d738f47e 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1241,7 +1241,7 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
   defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
 }
 
-let Predicates = [HasBMI] in {
+let Predicates = [HasBMI, NoEGPR] in {
   // FIXME(1): patterns for the load versions are not implemented
   // FIXME(2): By only matching `add_su` and `ineg_su` we may emit
   // extra `mov` instructions if `src` has future uses. It may be better
@@ -1278,6 +1278,39 @@ let Predicates = [HasBMI] in {
             (BLSI64rr GR64:$src)>;
 }
 
+let Predicates = [HasBMI, HasEGPR] in {
+  def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
+            (BLSR32rr_EVEX GR32:$src)>;
+  def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
+            (BLSR64rr_EVEX GR64:$src)>;
+
+  def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
+            (BLSMSK32rr_EVEX GR32:$src)>;
+  def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
+            (BLSMSK64rr_EVEX GR64:$src)>;
+
+  def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
+            (BLSI32rr_EVEX GR32:$src)>;
+  def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
+            (BLSI64rr_EVEX GR64:$src)>;
+
+  // Versions to match flag producing ops.
+  def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+            (BLSR32rr_EVEX GR32:$src)>;
+  def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+            (BLSR64rr_EVEX GR64:$src)>;
+
+  def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+            (BLSMSK32rr_EVEX GR32:$src)>;
+  def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+            (BLSMSK64rr_EVEX GR64:$src)>;
+
+  def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
+            (BLSI32rr_EVEX GR32:$src)>;
+  def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
+            (BLSI64rr_EVEX GR64:$src)>;
+}
+
 multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
                     X86FoldableSchedWrite sched, string Suffix = ""> {
   let SchedRW = [sched], Form = MRMSrcReg4VOp3 in
@@ -1324,7 +1357,7 @@ def AndMask64 : ImmLeaf<i64, [{
 }]>;
 
 // Use BEXTR for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI, NoBMI2, NoTBM] in {
+let Predicates = [HasBMI, NoBMI2, NoTBM, NoEGPR] in {
   def : Pat<(and GR64:$src, AndMask64:$mask),
             (BEXTR64rr GR64:$src,
               (SUBREG_TO_REG (i64 0),
@@ -1335,8 +1368,19 @@ let Predicates = [HasBMI, NoBMI2, NoTBM] in {
                              (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
 }
 
+let Predicates = [HasBMI, NoBMI2, NoTBM, HasEGPR] in {
+  def : Pat<(and GR64:$src, AndMask64:$mask),
+            (BEXTR64rr_EVEX GR64:$src,
+              (SUBREG_TO_REG (i64 0),
+                             (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+  def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+            (BEXTR64rm_EVEX addr:$src,
+              (SUBREG_TO_REG (i64 0),
+                             (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+}
+
 // Use BZHI for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI2, NoTBM] in {
+let Predicates = [HasBMI2, NoTBM, NoEGPR] in {
   def : Pat<(and GR64:$src, AndMask64:$mask),
             (BZHI64rr GR64:$src,
               (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -1347,6 +1391,17 @@ let Predicates = [HasBMI2, NoTBM] in {
                              (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
 }
 
+let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
+  def : Pat<(and GR64:$src, AndMask64:$mask),
+            (BZHI64rr_EVEX GR64:$src,
+              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                             (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+  def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+            (BZHI64rm_EVEX addr:$src,
+              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                             (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+}
+
 multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
                          X86MemOperand x86memop, SDPatternOperator OpNode,
                          PatFrag ld_frag, string Suffix = ""> {
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index f951894db1890c..e225fe6950e3da 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,7 +879,7 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
   defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
 }
 
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
   // Prefer RORX which is non-destructive and doesn't update EFLAGS.
   let AddedComplexity = 10 in {
     def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
@@ -983,6 +983,86 @@ let Predicates = [HasBMI2] in {
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 }
 
+let Predicates = [HasBMI2, HasEGPR] in {
+  let AddedComplexity = 10 in {
+    def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+              (RORX32ri_EVEX GR32:$src, imm:$shamt)>;
+    def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+              (RORX64ri_EVEX GR64:$src, imm:$shamt)>;
+
+    def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+              (RORX32ri_EVEX GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+    def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+              (RORX64ri_EVEX GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+  }
+
+  def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+            (RORX32mi_EVEX addr:$src, imm:$shamt)>;
+  def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+            (RORX64mi_EVEX addr:$src, imm:$shamt)>;
+
+  def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+            (RORX32mi_EVEX addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+  def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+            (RORX64mi_EVEX addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+  let AddedComplexity = 1 in {
+    def : Pat<(sra GR32:$src1, GR8:$src2),
+              (SARX32rr_EVEX GR32:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(sra GR64:$src1, GR8:$src2),
+              (SARX64rr_EVEX GR64:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(srl GR32:$src1, GR8:$src2),
+              (SHRX32rr_EVEX GR32:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(srl GR64:$src1, GR8:$src2),
+              (SHRX64rr_EVEX GR64:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(shl GR32:$src1, GR8:$src2),
+              (SHLX32rr_EVEX GR32:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(shl GR64:$src1, GR8:$src2),
+              (SHLX64rr_EVEX GR64:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  }
+
+  def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
+            (SARX32rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
+            (SARX64rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+  def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
+            (SHRX32rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
+            (SHRX64rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+  def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
+            (SHLX32rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
+            (SHLX64rm_EVEX addr:$src1,
+                      (INSERT_SUBREG
+                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
 def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
           (ROL8ri GR8:$src1, relocImm:$src2)>;
 def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
index d704f38307fcb8..5b7bb1ca97b5ca 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
 
@@ -14,6 +15,13 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) {
 ; X64-NEXT:    xorq $-1, %rax
 ; X64-NEXT:    andq %rsi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__andn_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff]
+; EGPR-NEXT:    andq %rsi, %rax # encoding: [0x48,0x21,0xf0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %xor = xor i64 %a0, -1
   %res = and i64 %xor, %a1
   ret i64 %res
@@ -24,6 +32,11 @@ define i64 @test__bextr_u64(i64 %a0, i64 %a1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    bextrq %rsi, %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__bextr_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
   ret i64 %res
 }
@@ -35,6 +48,13 @@ define i64 @test__blsi_u64(i64 %a0) {
 ; X64-NEXT:    subq %rdi, %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__blsi_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    subq %rdi, %rax # encoding: [0x48,0x29,0xf8]
+; EGPR-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %neg = sub i64 0, %a0
   %res = and i64 %a0, %neg
   ret i64 %res
@@ -46,6 +66,12 @@ define i64 @test__blsmsk_u64(i64 %a0) {
 ; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    xorq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__blsmsk_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT:    xorq %rdi, %rax # encoding: [0x48,0x31,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i64 %a0, 1
   %res = xor i64 %a0, %dec
   ret i64 %res
@@ -57,6 +83,12 @@ define i64 @test__blsr_u64(i64 %a0) {
 ; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__blsr_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i64 %a0, 1
   %res = and i64 %a0, %dec
   ret i64 %res
@@ -67,6 +99,11 @@ define i64 @test__tzcnt_u64(i64 %a0) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    tzcntq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__tzcnt_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    tzcntq %rdi, %rax # encoding: [0xf3,0x48,0x0f,0xbc,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %cmp = icmp ne i64 %a0, 0
   %cttz = call i64 @llvm.cttz.i64(i64 %a0, i1 false)
   ret i64 %cttz
@@ -83,6 +120,13 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) {
 ; X64-NEXT:    xorq $-1, %rax
 ; X64-NEXT:    andq %rsi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_andn_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff]
+; EGPR-NEXT:    andq %rsi, %rax # encoding: [0x48,0x21,0xf0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %xor = xor i64 %a0, -1
   %res = and i64 %xor, %a1
   ret i64 %res
@@ -98,6 +142,16 @@ define i64 @test_bextr_u64(i64 %a0, i32 %a1, i32 %a2) {
 ; X64-NEXT:    movl %edx, %eax
 ; X64-NEXT:    bextrq %rax, %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_bextr_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andl $255, %esi # encoding: [0x81,0xe6,0xff,0x00,0x00,0x00]
+; EGPR-NEXT:    andl $255, %edx # encoding: [0x81,0xe2,0xff,0x00,0x00,0x00]
+; EGPR-NEXT:    shll $8, %edx # encoding: [0xc1,0xe2,0x08]
+; EGPR-NEXT:    orl %esi, %edx # encoding: [0x09,0xf2]
+; EGPR-NEXT:    movl %edx, %eax # encoding: [0x89,0xd0]
+; EGPR-NEXT:    bextrq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and1 = and i32 %a1, 255
   %and2 = and i32 %a2, 255
   %shl = shl i32 %and2, 8
@@ -114,6 +168,13 @@ define i64 @test_blsi_u64(i64 %a0) {
 ; X64-NEXT:    subq %rdi, %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_blsi_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    subq %rdi, %rax # encoding: [0x48,0x29,0xf8]
+; EGPR-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %neg = sub i64 0, %a0
   %res = and i64 %a0, %neg
   ret i64 %res
@@ -125,6 +186,12 @@ define i64 @test_blsmsk_u64(i64 %a0) {
 ; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    xorq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_blsmsk_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT:    xorq %rdi, %rax # encoding: [0x48,0x31,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i64 %a0, 1
   %res = xor i64 %a0, %dec
   ret i64 %res
@@ -136,6 +203,12 @@ define i64 @test_blsr_u64(i64 %a0) {
 ; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_blsr_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i64 %a0, 1
   %res = and i64 %a0, %dec
   ret i64 %res
@@ -146,6 +219,11 @@ define i64 @test_tzcnt_u64(i64 %a0) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    tzcntq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_tzcnt_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    tzcntq %rdi, %rax # encoding: [0xf3,0x48,0x0f,0xbc,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %cmp = icmp ne i64 %a0, 0
   %cttz = call i64 @llvm.cttz.i64(i64 %a0, i1 false)
   ret i64 %cttz
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
index 58b894a9da8b6f..7dbd1bba63861e 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
-
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
 
 ;
@@ -23,6 +23,14 @@ define i16 @test__tzcnt_u16(i16 %a0) {
 ; X64-NEXT:    tzcntl %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__tzcnt_u16:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    orl $65536, %edi # encoding: [0x81,0xcf,0x00,0x00,0x01,0x00]
+; EGPR-NEXT:    # imm = 0x10000
+; EGPR-NEXT:    tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT:    # kill: def $ax killed $ax killed $eax
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %zext = zext i16 %a0 to i32
   %cmp = icmp ne i32 %zext, 0
   %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false)
@@ -43,6 +51,13 @@ define i32 @test__andn_u32(i32 %a0, i32 %a1) {
 ; X64-NEXT:    xorl $-1, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__andn_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    xorl $-1, %eax # encoding: [0x83,0xf0,0xff]
+; EGPR-NEXT:    andl %esi, %eax # encoding: [0x21,0xf0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %xor = xor i32 %a0, -1
   %res = and i32 %xor, %a1
   ret i32 %res
@@ -59,6 +74,11 @@ define i32 @test__bextr_u32(i32 %a0, i32 %a1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    bextrl %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__bextr_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
   ret i32 %res
 }
@@ -78,6 +98,13 @@ define i32 @test__blsi_u32(i32 %a0) {
 ; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__blsi_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    subl %edi, %eax # encoding: [0x29,0xf8]
+; EGPR-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %neg = sub i32 0, %a0
   %res = and i32 %a0, %neg
   ret i32 %res
@@ -97,6 +124,13 @@ define i32 @test__blsmsk_u32(i32 %a0) {
 ; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__blsmsk_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT:    xorl %edi, %eax # encoding: [0x31,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i32 %a0, 1
   %res = xor i32 %a0, %dec
   ret i32 %res
@@ -116,6 +150,13 @@ define i32 @test__blsr_u32(i32 %a0) {
 ; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__blsr_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i32 %a0, 1
   %res = and i32 %a0, %dec
   ret i32 %res
@@ -131,6 +172,11 @@ define i32 @test__tzcnt_u32(i32 %a0) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    tzcntl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test__tzcnt_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %cmp = icmp ne i32 %a0, 0
   %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false)
   ret i32 %cttz
@@ -155,6 +201,14 @@ define i16 @test_tzcnt_u16(i16 %a0) {
 ; X64-NEXT:    tzcntl %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_tzcnt_u16:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    orl $65536, %edi # encoding: [0x81,0xcf,0x00,0x00,0x01,0x00]
+; EGPR-NEXT:    # imm = 0x10000
+; EGPR-NEXT:    tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT:    # kill: def $ax killed $ax killed $eax
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %zext = zext i16 %a0 to i32
   %cmp = icmp ne i32 %zext, 0
   %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 false)
@@ -175,6 +229,13 @@ define i32 @test_andn_u32(i32 %a0, i32 %a1) {
 ; X64-NEXT:    xorl $-1, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_andn_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    xorl $-1, %eax # encoding: [0x83,0xf0,0xff]
+; EGPR-NEXT:    andl %esi, %eax # encoding: [0x21,0xf0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %xor = xor i32 %a0, -1
   %res = and i32 %xor, %a1
   ret i32 %res
@@ -200,6 +261,15 @@ define i32 @test_bextr_u32(i32 %a0, i32 %a1, i32 %a2) {
 ; X64-NEXT:    orl %esi, %edx
 ; X64-NEXT:    bextrl %edx, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_bextr_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andl $255, %esi # encoding: [0x81,0xe6,0xff,0x00,0x00,0x00]
+; EGPR-NEXT:    andl $255, %edx # encoding: [0x81,0xe2,0xff,0x00,0x00,0x00]
+; EGPR-NEXT:    shll $8, %edx # encoding: [0xc1,0xe2,0x08]
+; EGPR-NEXT:    orl %esi, %edx # encoding: [0x09,0xf2]
+; EGPR-NEXT:    bextrl %edx, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x68,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and1 = and i32 %a1, 255
   %and2 = and i32 %a2, 255
   %shl = shl i32 %and2, 8
@@ -223,6 +293,13 @@ define i32 @test_blsi_u32(i32 %a0) {
 ; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_blsi_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    subl %edi, %eax # encoding: [0x29,0xf8]
+; EGPR-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %neg = sub i32 0, %a0
   %res = and i32 %a0, %neg
   ret i32 %res
@@ -242,6 +319,13 @@ define i32 @test_blsmsk_u32(i32 %a0) {
 ; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_blsmsk_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT:    xorl %edi, %eax # encoding: [0x31,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i32 %a0, 1
   %res = xor i32 %a0, %dec
   ret i32 %res
@@ -261,6 +345,13 @@ define i32 @test_blsr_u32(i32 %a0) {
 ; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_blsr_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    leal -1(%rdi), %eax # encoding: [0x8d,0x47,0xff]
+; EGPR-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %dec = sub i32 %a0, 1
   %res = and i32 %a0, %dec
   ret i32 %res
@@ -276,6 +367,11 @@ define i32 @test_tzcnt_u32(i32 %a0) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    tzcntl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: test_tzcnt_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    tzcntl %edi, %eax # encoding: [0xf3,0x0f,0xbc,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %cmp = icmp ne i32 %a0, 0
   %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 false)
   ret i32 %cttz
diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll
index df180d0f0235ce..aa571531c0c6aa 100644
--- a/llvm/test/CodeGen/X86/bmi-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2-SLOW
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
 
@@ -11,6 +12,11 @@ define i64 @bextr64(i64 %x, i64 %y)   {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    bextrq %rsi, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: bextr64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
   ret i64 %tmp
 }
@@ -28,6 +34,14 @@ define i64 @bextr64b(i64 %x)  uwtable  ssp {
 ; BEXTR-FAST-NEXT:    movl $3076, %eax # imm = 0xC04
 ; BEXTR-FAST-NEXT:    bextrl %eax, %edi, %eax
 ; BEXTR-FAST-NEXT:    retq
+;
+; EGPR-LABEL: bextr64b:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    shrl $4, %eax # encoding: [0xc1,0xe8,0x04]
+; EGPR-NEXT:    andl $4095, %eax # encoding: [0x25,0xff,0x0f,0x00,0x00]
+; EGPR-NEXT:    # imm = 0xFFF
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %1 = lshr i64 %x, 4
   %2 = and i64 %1, 4095
   ret i64 %2
@@ -40,6 +54,12 @@ define i64 @bextr64_subreg(i64 %x)  uwtable  ssp {
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movzbl %ah, %eax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: bextr64_subreg:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %1 = lshr i64 %x, 8
   %2 = and i64 %1, 255
   ret i64 %2
@@ -58,6 +78,14 @@ define i64 @bextr64b_load(ptr %x) {
 ; BEXTR-FAST-NEXT:    movl $3076, %eax # imm = 0xC04
 ; BEXTR-FAST-NEXT:    bextrl %eax, (%rdi), %eax
 ; BEXTR-FAST-NEXT:    retq
+;
+; EGPR-LABEL: bextr64b_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; EGPR-NEXT:    shrl $4, %eax # encoding: [0xc1,0xe8,0x04]
+; EGPR-NEXT:    andl $4095, %eax # encoding: [0x25,0xff,0x0f,0x00,0x00]
+; EGPR-NEXT:    # imm = 0xFFF
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %1 = load i64, ptr %x, align 8
   %2 = lshr i64 %1, 4
   %3 = and i64 %2, 4095
@@ -71,6 +99,12 @@ define i64 @bextr64c(i64 %x, i32 %y) {
 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
 ; CHECK-NEXT:    bextrq %rsi, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: bextr64c:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $esi killed $esi def $rsi
+; EGPR-NEXT:    bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp0 = sext i32 %y to i64
   %tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0)
   ret i64 %tmp1
@@ -96,6 +130,13 @@ define i64 @bextr64d(i64 %a) {
 ; BEXTR-FAST-NEXT:    movl $8450, %eax # imm = 0x2102
 ; BEXTR-FAST-NEXT:    bextrq %rax, %rdi, %rax
 ; BEXTR-FAST-NEXT:    retq
+;
+; EGPR-LABEL: bextr64d:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movl $35, %eax # encoding: [0xb8,0x23,0x00,0x00,0x00]
+; EGPR-NEXT:    bzhiq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf5,0xc7]
+; EGPR-NEXT:    shrq $2, %rax # encoding: [0x48,0xc1,0xe8,0x02]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %shr = lshr i64 %a, 2
   %and = and i64 %shr, 8589934591
@@ -123,6 +164,13 @@ define i64 @bextr64d_load(ptr %aptr) {
 ; BEXTR-FAST-NEXT:    movl $8450, %eax # imm = 0x2102
 ; BEXTR-FAST-NEXT:    bextrq %rax, (%rdi), %rax
 ; BEXTR-FAST-NEXT:    retq
+;
+; EGPR-LABEL: bextr64d_load:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movl $35, %eax # encoding: [0xb8,0x23,0x00,0x00,0x00]
+; EGPR-NEXT:    bzhiq %rax, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf5,0x07]
+; EGPR-NEXT:    shrq $2, %rax # encoding: [0x48,0xc1,0xe8,0x02]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %a = load i64, ptr %aptr, align 8
   %shr = lshr i64 %a, 2
@@ -137,6 +185,14 @@ define i64 @non_bextr64(i64 %x) {
 ; CHECK-NEXT:    movabsq $8589934590, %rax # imm = 0x1FFFFFFFE
 ; CHECK-NEXT:    andq %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: non_bextr64:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    shrq $2, %rdi # encoding: [0x48,0xc1,0xef,0x02]
+; EGPR-NEXT:    movabsq $8589934590, %rax # encoding: [0x48,0xb8,0xfe,0xff,0xff,0xff,0x01,0x00,0x00,0x00]
+; EGPR-NEXT:    # imm = 0x1FFFFFFFE
+; EGPR-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %shr = lshr i64 %x, 2
   %and = and i64 %shr, 8589934590
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index e4e33c99a6b88a..2683fab59ad1bc 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 define i32 @andn32(i32 %x, i32 %y)   {
 ; X86-LABEL: andn32:
@@ -17,6 +18,11 @@ define i32 @andn32(i32 %x, i32 %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    andnl %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp1 = xor i32 %x, -1
   %tmp2 = and i32 %y, %tmp1
   ret i32 %tmp2
@@ -34,6 +40,11 @@ define i32 @andn32_load(i32 %x, ptr %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    andnl (%rsi), %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = load i32, ptr %y
   %tmp1 = xor i32 %x, -1
   %tmp2 = and i32 %y1, %tmp1
@@ -53,6 +64,11 @@ define i64 @andn64(i64 %x, i64 %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    andnq %rsi, %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp1 = xor i64 %x, -1
   %tmp2 = and i64 %tmp1, %y
   ret i64 %tmp2
@@ -72,6 +88,13 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
 ; X64-NEXT:    andnl %esi, %edi, %eax
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn_cmp:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %notx = xor i32 %x, -1
   %and = and i32 %notx, %y
   %cmp = icmp eq i32 %and, 0
@@ -92,6 +115,13 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
 ; X64-NEXT:    andnl %esi, %edi, %eax
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp1:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %x, %y
   %cmp = icmp eq i32 %and, %y
   ret i1 %cmp
@@ -110,6 +140,13 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
 ; X64-NEXT:    andnl %esi, %edi, %eax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %y, %x
   %cmp = icmp ne i32 %and, %y
   ret i1 %cmp
@@ -128,6 +165,13 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
 ; X64-NEXT:    andnl %esi, %edi, %eax
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp3:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %x, %y
   %cmp = icmp eq i32 %y, %and
   ret i1 %cmp
@@ -146,6 +190,13 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
 ; X64-NEXT:    andnl %esi, %edi, %eax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp4:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x40,0xf2,0xc6]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %y, %x
   %cmp = icmp ne i32 %y, %and
   ret i1 %cmp
@@ -168,6 +219,13 @@ define i1 @and_cmp_const(i32 %x) {
 ; X64-NEXT:    testb $43, %dil
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp_const:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    notl %edi # encoding: [0xf7,0xd7]
+; EGPR-NEXT:    testb $43, %dil # encoding: [0x40,0xf6,0xc7,0x2b]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %x, 43
   %cmp = icmp eq i32 %and, 43
   ret i1 %cmp
@@ -188,6 +246,12 @@ define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
 ; X64-NEXT:    btl %esi, %edi
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp_const_power_of_two:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    btl %esi, %edi # encoding: [0x0f,0xa3,0xf7]
+; EGPR-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = shl i32 1, %y
   %and = and i32 %x, %shl
   %cmp = icmp ne i32 %and, %shl
@@ -214,6 +278,15 @@ define i32 @and_cmp_not_one_use(i32 %x) {
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: and_cmp_not_one_use:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andl $37, %edi # encoding: [0x83,0xe7,0x25]
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    cmpl $37, %edi # encoding: [0x83,0xff,0x25]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %x, 37
   %cmp = icmp eq i32 %and, 37
   %ext = zext i1 %cmp to i32
@@ -238,6 +311,13 @@ define i1 @not_an_andn1(i32 %x, i32 %y) {
 ; X64-NEXT:    cmpl %edi, %esi
 ; X64-NEXT:    setg %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: not_an_andn1:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andl %esi, %edi # encoding: [0x21,0xf7]
+; EGPR-NEXT:    cmpl %edi, %esi # encoding: [0x39,0xfe]
+; EGPR-NEXT:    setg %al # encoding: [0x0f,0x9f,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %x, %y
   %cmp = icmp sgt i32 %y, %and
   ret i1 %cmp
@@ -259,6 +339,13 @@ define i1 @not_an_andn2(i32 %x, i32 %y) {
 ; X64-NEXT:    cmpl %edi, %esi
 ; X64-NEXT:    setbe %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: not_an_andn2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andl %esi, %edi # encoding: [0x21,0xf7]
+; EGPR-NEXT:    cmpl %edi, %esi # encoding: [0x39,0xfe]
+; EGPR-NEXT:    setbe %al # encoding: [0x0f,0x96,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %y, %x
   %cmp = icmp ule i32 %y, %and
   ret i1 %cmp
@@ -281,6 +368,13 @@ define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
 ; X64-NEXT:    andnq %rsi, %rdi, %rax
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn_cmp_swap_ops:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc0,0xf2,0xc6]
+; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %notx = xor i64 %x, -1
   %and = and i64 %y, %notx
   %cmp = icmp eq i64 %and, 0
@@ -303,6 +397,13 @@ define i1 @andn_cmp_i8(i8 %x, i8 %y) {
 ; X64-NEXT:    testb %sil, %dil
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn_cmp_i8:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    notb %sil # encoding: [0x40,0xf6,0xd6]
+; EGPR-NEXT:    testb %sil, %dil # encoding: [0x40,0x84,0xf7]
+; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %noty = xor i8 %y, -1
   %and = and i8 %x, %noty
   %cmp = icmp eq i8 %and, 0
@@ -323,6 +424,13 @@ define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) {
 ; X64-NEXT:    andnl %edi, %esi, %eax
 ; X64-NEXT:    setle %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: andn_cmp_i32_overflow:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andnl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf2,0xc7]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    setle %al # encoding: [0x0f,0x9e,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %noty = xor i32 %y, -1
   %and = and i32 %x, %noty
   %cmp = icmp slt i32 %and, 1
@@ -342,6 +450,11 @@ define i32 @bextr32(i32 %x, i32 %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    bextrl %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bextr32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
   ret i32 %tmp
 }
@@ -358,6 +471,11 @@ define i32 @bextr32_load(ptr %x, i32 %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    bextrl %esi, (%rdi), %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bextr32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = load i32, ptr %x
   %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
   ret i32 %tmp
@@ -389,6 +507,13 @@ define i32 @bextr32b(i32 %x)  uwtable  ssp {
 ; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
 ; X64-FAST-BEXTR-NEXT:    bextrl %eax, %edi, %eax
 ; X64-FAST-BEXTR-NEXT:    retq
+;
+; EGPR-LABEL: bextr32b:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00]
+; EGPR-NEXT:    # imm = 0xC04
+; EGPR-NEXT:    bextrl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %1 = lshr i32 %x, 4
   %2 = and i32 %1, 4095
   ret i32 %2
@@ -406,6 +531,12 @@ define i32 @bextr32_subreg(i32 %x)  uwtable  ssp {
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movzbl %ah, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bextr32_subreg:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    movzbl %ah, %eax # encoding: [0x0f,0xb6,0xc4]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %1 = lshr i32 %x, 8
   %2 = and i32 %1, 255
   ret i32 %2
@@ -439,6 +570,13 @@ define i32 @bextr32b_load(ptr %x)  uwtable  ssp {
 ; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
 ; X64-FAST-BEXTR-NEXT:    bextrl %eax, (%rdi), %eax
 ; X64-FAST-BEXTR-NEXT:    retq
+;
+; EGPR-LABEL: bextr32b_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl $3076, %eax # encoding: [0xb8,0x04,0x0c,0x00,0x00]
+; EGPR-NEXT:    # imm = 0xC04
+; EGPR-NEXT:    bextrl %eax, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %1 = load i32, ptr %x
   %2 = lshr i32 %1, 4
   %3 = and i32 %2, 4095
@@ -457,6 +595,11 @@ define i32 @bextr32c(i32 %x, i16 zeroext %y) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    bextrl %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bextr32c:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp0 = sext i16 %y to i32
   %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
   ret i32 %tmp1
@@ -476,6 +619,13 @@ define i32 @non_bextr32(i32 %x) {
 ; X64-NEXT:    shrl $2, %eax
 ; X64-NEXT:    andl $111, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: non_bextr32:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    shrl $2, %eax # encoding: [0xc1,0xe8,0x02]
+; EGPR-NEXT:    andl $111, %eax # encoding: [0x83,0xe0,0x6f]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %shr = lshr i32 %x, 2
   %and = and i32 %shr, 111
@@ -492,6 +642,11 @@ define i32 @blsi32(i32 %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsil %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i32 0, %x
   %tmp2 = and i32 %x, %tmp
   ret i32 %tmp2
@@ -508,6 +663,11 @@ define i32 @blsi32_load(ptr %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsil (%rdi), %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsil (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x1f]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = load i32, ptr %x
   %tmp = sub i32 0, %x1
   %tmp2 = and i32 %x1, %tmp
@@ -529,6 +689,13 @@ define i32 @blsi32_z(i32 %a, i32 %b) nounwind {
 ; X64-NEXT:    blsil %edi, %eax
 ; X64-NEXT:    cmovel %esi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi32_z:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 0, %a
   %t1 = and i32 %t0, %a
   %t2 = icmp eq i32 %t1, 0
@@ -552,6 +719,14 @@ define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; X64-NEXT:    blsil %edi, %ecx
 ; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi32_z2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT:    blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf]
+; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 0, %a
   %t1 = and i32 %t0, %a
   %t2 = icmp eq i32 %t1, 0
@@ -577,6 +752,14 @@ define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind {
 ; X64-NEXT:    blsil %edi, %ecx
 ; X64-NEXT:    cmovgl %edx, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi32_sle:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT:    blsil %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xdf]
+; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT:    cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 0, %a
   %t1 = and i32 %t0, %a
   %t2 = icmp sle i32 %t1, 0
@@ -606,6 +789,11 @@ define i64 @blsi64(i64 %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsiq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i64 0, %x
   %tmp2 = and i64 %tmp, %x
   ret i64 %tmp2
@@ -638,6 +826,13 @@ define i64 @blsi64_z(i64 %a, i64 %b) nounwind {
 ; X64-NEXT:    blsiq %rdi, %rax
 ; X64-NEXT:    cmoveq %rsi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi64_z:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
+; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 0, %a
   %t1 = and i64 %t0, %a
   %t2 = icmp eq i64 %t1, 0
@@ -672,6 +867,14 @@ define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; X64-NEXT:    blsiq %rdi, %rcx
 ; X64-NEXT:    cmovneq %rdx, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi64_z2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT:    blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf]
+; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT:    cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 0, %a
   %t1 = and i64 %t0, %a
   %t2 = icmp eq i64 %t1, 0
@@ -707,6 +910,14 @@ define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind {
 ; X64-NEXT:    blsiq %rdi, %rcx
 ; X64-NEXT:    cmovgq %rdx, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi64_sle:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT:    blsiq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xdf]
+; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT:    cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 0, %a
   %t1 = and i64 %t0, %a
   %t2 = icmp sle i64 %t1, 0
@@ -724,6 +935,11 @@ define i32 @blsmsk32(i32 %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsmskl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i32 %x, 1
   %tmp2 = xor i32 %x, %tmp
   ret i32 %tmp2
@@ -740,6 +956,11 @@ define i32 @blsmsk32_load(ptr %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsmskl (%rdi), %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsmskl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x17]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = load i32, ptr %x
   %tmp = sub i32 %x1, 1
   %tmp2 = xor i32 %x1, %tmp
@@ -761,6 +982,13 @@ define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
 ; X64-NEXT:    blsmskl %edi, %eax
 ; X64-NEXT:    cmovel %esi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk32_z:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsmskl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xd7]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 %a, 1
   %t1 = xor i32 %t0, %a
   %t2 = icmp eq i32 %t1, 0
@@ -784,6 +1012,13 @@ define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; X64-NEXT:    blsmskl %edi, %ecx
 ; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk32_z2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT:    blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7]
+; EGPR-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 %a, 1
   %t1 = xor i32 %t0, %a
   %t2 = icmp eq i32 %t1, 0
@@ -807,6 +1042,14 @@ define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind {
 ; X64-NEXT:    blsmskl %edi, %ecx
 ; X64-NEXT:    cmovgl %edx, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk32_sle:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT:    blsmskl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xd7]
+; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT:    cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 %a, 1
   %t1 = xor i32 %t0, %a
   %t2 = icmp sle i32 %t1, 0
@@ -836,6 +1079,11 @@ define i64 @blsmsk64(i64 %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsmskq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i64 %x, 1
   %tmp2 = xor i64 %tmp, %x
   ret i64 %tmp2
@@ -868,6 +1116,13 @@ define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind {
 ; X64-NEXT:    blsmskq %rdi, %rax
 ; X64-NEXT:    cmoveq %rsi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk64_z:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsmskq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xd7]
+; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 %a, 1
   %t1 = xor i64 %t0, %a
   %t2 = icmp eq i64 %t1, 0
@@ -902,6 +1157,13 @@ define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; X64-NEXT:    blsmskq %rdi, %rcx
 ; X64-NEXT:    cmovneq %rdx, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk64_z2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT:    blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7]
+; EGPR-NEXT:    cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 %a, 1
   %t1 = xor i64 %t0, %a
   %t2 = icmp eq i64 %t1, 0
@@ -937,6 +1199,14 @@ define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind {
 ; X64-NEXT:    blsmskq %rdi, %rcx
 ; X64-NEXT:    cmovgq %rdx, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsmsk64_sle:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT:    blsmskq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xd7]
+; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT:    cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 %a, 1
   %t1 = xor i64 %t0, %a
   %t2 = icmp sle i64 %t1, 0
@@ -954,6 +1224,11 @@ define i32 @blsr32(i32 %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsrl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i32 %x, 1
   %tmp2 = and i32 %x, %tmp
   ret i32 %tmp2
@@ -970,6 +1245,11 @@ define i32 @blsr32_load(ptr %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsrl (%rdi), %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrl (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0x0f]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = load i32, ptr %x
   %tmp = sub i32 %x1, 1
   %tmp2 = and i32 %x1, %tmp
@@ -991,6 +1271,13 @@ define i32 @blsr32_z(i32 %a, i32 %b) nounwind {
 ; X64-NEXT:    blsrl %edi, %eax
 ; X64-NEXT:    cmovel %esi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr32_z:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 %a, 1
   %t1 = and i32 %t0, %a
   %t2 = icmp eq i32 %t1, 0
@@ -1014,6 +1301,14 @@ define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; X64-NEXT:    blsrl %edi, %ecx
 ; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr32_z2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT:    blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf]
+; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 %a, 1
   %t1 = and i32 %t0, %a
   %t2 = icmp eq i32 %t1, 0
@@ -1037,6 +1332,14 @@ define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind {
 ; X64-NEXT:    blsrl %edi, %ecx
 ; X64-NEXT:    cmovgl %edx, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr32_sle:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT:    blsrl %edi, %ecx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x70,0xf3,0xcf]
+; EGPR-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
+; EGPR-NEXT:    cmovgl %edx, %eax # encoding: [0x0f,0x4f,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i32 %a, 1
   %t1 = and i32 %t0, %a
   %t2 = icmp sle i32 %t1, 0
@@ -1066,6 +1369,11 @@ define i64 @blsr64(i64 %x)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    blsrq %rdi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i64 %x, 1
   %tmp2 = and i64 %tmp, %x
   ret i64 %tmp2
@@ -1098,6 +1406,13 @@ define i64 @blsr64_z(i64 %a, i64 %b) nounwind {
 ; X64-NEXT:    blsrq %rdi, %rax
 ; X64-NEXT:    cmoveq %rsi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr64_z:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
+; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 %a, 1
   %t1 = and i64 %t0, %a
   %t2 = icmp eq i64 %t1, 0
@@ -1132,6 +1447,14 @@ define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; X64-NEXT:    blsrq %rdi, %rcx
 ; X64-NEXT:    cmovneq %rdx, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr64_z2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT:    blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf]
+; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT:    cmovneq %rdx, %rax # encoding: [0x48,0x0f,0x45,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 %a, 1
   %t1 = and i64 %t0, %a
   %t2 = icmp eq i64 %t1, 0
@@ -1167,6 +1490,14 @@ define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind {
 ; X64-NEXT:    blsrq %rdi, %rcx
 ; X64-NEXT:    cmovgq %rdx, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr64_sle:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
+; EGPR-NEXT:    blsrq %rdi, %rcx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf0,0xf3,0xcf]
+; EGPR-NEXT:    testq %rcx, %rcx # encoding: [0x48,0x85,0xc9]
+; EGPR-NEXT:    cmovgq %rdx, %rax # encoding: [0x48,0x0f,0x4f,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %t0 = sub i64 %a, 1
   %t1 = and i64 %t0, %a
   %t2 = icmp sle i64 %t1, 0
@@ -1189,6 +1520,12 @@ define i64 @blsr_disguised_constant(i64 %x) {
 ; X64-NEXT:    blsrl %edi, %eax
 ; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr_disguised_constant:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %a1 = and i64 %x, 65535
   %a2 = add i64 %x, 65535
   %r = and i64 %a1, %a2
@@ -1211,6 +1548,12 @@ define i64 @blsr_disguised_shrunk_add(i64 %x) {
 ; X64-NEXT:    shrq $48, %rdi
 ; X64-NEXT:    blsrl %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr_disguised_shrunk_add:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrq $48, %rdi # encoding: [0x48,0xc1,0xef,0x30]
+; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %a = lshr i64 %x, 48
   %b = add i64 %a, -1
   %c = and i64 %b, %a
@@ -1234,6 +1577,16 @@ define void @pr40060(i32, i32) {
 ; X64-NEXT:    jns bar # TAILCALL
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pr40060:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bextrl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf7,0xc7]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    jns bar # TAILCALL
+; EGPR-NEXT:    # encoding: [0x79,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1)
   %4 = icmp sgt i32 %3, -1
   br i1 %4, label %5, label %6
@@ -1274,6 +1627,23 @@ define i32 @blsr32_branch(i32 %x) {
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr32_branch:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    .cfi_def_cfa_offset 16
+; EGPR-NEXT:    .cfi_offset %rbx, -16
+; EGPR-NEXT:    blsrl %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xcf]
+; EGPR-NEXT:    jne .LBB53_2 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB53_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT:  .LBB53_2:
+; EGPR-NEXT:    movl %ebx, %eax # encoding: [0x89,0xd8]
+; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
+; EGPR-NEXT:    .cfi_def_cfa_offset 8
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i32 %x, 1
   %tmp2 = and i32 %x, %tmp
   %cmp = icmp eq i32 %tmp2, 0
@@ -1329,6 +1699,23 @@ define i64 @blsr64_branch(i64 %x) {
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsr64_branch:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    .cfi_def_cfa_offset 16
+; EGPR-NEXT:    .cfi_offset %rbx, -16
+; EGPR-NEXT:    blsrq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xcf]
+; EGPR-NEXT:    jne .LBB54_2 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB54_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT:  .LBB54_2:
+; EGPR-NEXT:    movq %rbx, %rax # encoding: [0x48,0x89,0xd8]
+; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
+; EGPR-NEXT:    .cfi_def_cfa_offset 8
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i64 %x, 1
   %tmp2 = and i64 %x, %tmp
   %cmp = icmp eq i64 %tmp2, 0
@@ -1369,6 +1756,23 @@ define i32 @blsi32_branch(i32 %x) {
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi32_branch:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    .cfi_def_cfa_offset 16
+; EGPR-NEXT:    .cfi_offset %rbx, -16
+; EGPR-NEXT:    blsil %edi, %ebx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x60,0xf3,0xdf]
+; EGPR-NEXT:    jne .LBB55_2 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB55_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT:  .LBB55_2:
+; EGPR-NEXT:    movl %ebx, %eax # encoding: [0x89,0xd8]
+; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
+; EGPR-NEXT:    .cfi_def_cfa_offset 8
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i32 0, %x
   %tmp2 = and i32 %x, %tmp
   %cmp = icmp eq i32 %tmp2, 0
@@ -1424,6 +1828,23 @@ define i64 @blsi64_branch(i64 %x) {
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi64_branch:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    .cfi_def_cfa_offset 16
+; EGPR-NEXT:    .cfi_offset %rbx, -16
+; EGPR-NEXT:    blsiq %rdi, %rbx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe0,0xf3,0xdf]
+; EGPR-NEXT:    jne .LBB56_2 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB56_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    callq bar # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT:  .LBB56_2:
+; EGPR-NEXT:    movq %rbx, %rax # encoding: [0x48,0x89,0xd8]
+; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
+; EGPR-NEXT:    .cfi_def_cfa_offset 8
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i64 0, %x
   %tmp2 = and i64 %x, %tmp
   %cmp = icmp eq i64 %tmp2, 0
@@ -1450,6 +1871,16 @@ define void @pr42118_i32(i32 %x) {
 ; X64-NEXT:    je bar # TAILCALL
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pr42118_i32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrl %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xcf]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    je bar # TAILCALL
+; EGPR-NEXT:    # encoding: [0x74,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i32 0, %x
   %tmp1 = and i32 %tmp, %x
   %cmp = icmp eq i32 %tmp1, %x
@@ -1493,6 +1924,16 @@ define void @pr42118_i64(i64 %x) {
 ; X64-NEXT:    je bar # TAILCALL
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pr42118_i64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsrq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xcf]
+; EGPR-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; EGPR-NEXT:    je bar # TAILCALL
+; EGPR-NEXT:    # encoding: [0x74,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1:
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sub i64 0, %x
   %tmp1 = and i64 %tmp, %x
   %cmp = icmp eq i64 %tmp1, %x
@@ -1522,6 +1963,13 @@ define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
 ; X64-NEXT:    blsil %edi, %eax
 ; X64-NEXT:    cmovael %esi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi_cflag_32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsil %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0xf3,0xdf]
+; EGPR-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
+; EGPR-NEXT:    cmovel %esi, %eax # encoding: [0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tobool = icmp eq i32 %x, 0
   %sub = sub nsw i32 0, %x
   %and = and i32 %sub, %x
@@ -1560,6 +2008,13 @@ define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
 ; X64-NEXT:    blsiq %rdi, %rax
 ; X64-NEXT:    cmovaeq %rsi, %rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: blsi_cflag_64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    blsiq %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf8,0xf3,0xdf]
+; EGPR-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; EGPR-NEXT:    cmoveq %rsi, %rax # encoding: [0x48,0x0f,0x44,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tobool = icmp eq i64 %x, 0
   %sub = sub nsw i64 0, %x
   %and = and i64 %sub, %x
diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll
index 214c4eeb1f4c59..fa1c67986e11f2 100644
--- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll
@@ -1,11 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,egpr --show-mc-encoding | FileCheck %s --check-prefixes=EGPR
 
 define i64 @bzhi64(i64 %x, i64 %y)   {
 ; CHECK-LABEL: bzhi64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    bzhiq %rsi, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: bzhi64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bzhiq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf5,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
   ret i64 %tmp
 }
@@ -15,6 +21,11 @@ define i64 @bzhi64_load(ptr %x, i64 %y)   {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    bzhiq %rsi, (%rdi), %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: bzhi64_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bzhiq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf5,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = load i64, ptr %x
   %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x1, i64 %y)
   ret i64 %tmp
@@ -27,6 +38,11 @@ define i64 @pdep64(i64 %x, i64 %y)   {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pdepq %rsi, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: pdep64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pdepq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
   ret i64 %tmp
 }
@@ -36,6 +52,11 @@ define i64 @pdep64_load(i64 %x, ptr %y)   {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pdepq (%rsi), %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: pdep64_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pdepq (%rsi), %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = load i64, ptr %y
   %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y1)
   ret i64 %tmp
@@ -48,6 +69,14 @@ define i64 @pdep64_anyext(i32 %x)   {
 ; CHECK-NEXT:    movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
 ; CHECK-NEXT:    pdepq %rax, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: pdep64_anyext:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    movabsq $6148914691236517205, %rax # encoding: [0x48,0xb8,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55]
+; EGPR-NEXT:    # imm = 0x5555555555555555
+; EGPR-NEXT:    pdepq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf5,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = sext i32 %x to i64
   %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x1, i64 6148914691236517205)
   ret i64 %tmp
@@ -60,6 +89,11 @@ define i64 @pext64(i64 %x, i64 %y)   {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pextq %rsi, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: pext64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pextq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
   ret i64 %tmp
 }
@@ -69,6 +103,11 @@ define i64 @pext64_load(i64 %x, ptr %y)   {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pextq (%rsi), %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: pext64_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pextq (%rsi), %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = load i64, ptr %y
   %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y1)
   ret i64 %tmp
@@ -80,6 +119,13 @@ define i64 @pext64_knownbits(i64 %x, i64 %y)   {
 ; CHECK-NEXT:    movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
 ; CHECK-NEXT:    pextq %rax, %rdi, %rax
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: pext64_knownbits:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movabsq $6148914691236517205, %rax # encoding: [0x48,0xb8,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55]
+; EGPR-NEXT:    # imm = 0x5555555555555555
+; EGPR-NEXT:    pextq %rax, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf5,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 6148914691236517205)
   %tmp2 = and i64 %tmp, 4294967295
   ret i64 %tmp2
@@ -95,6 +141,14 @@ define i64 @mulx64(i64 %x, i64 %y, ptr %p)   {
 ; CHECK-NEXT:    mulxq %rsi, %rax, %rdx
 ; CHECK-NEXT:    movq %rdx, (%rcx)
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: mulx64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdx, %rcx # encoding: [0x48,0x89,0xd1]
+; EGPR-NEXT:    movq %rdi, %rdx # encoding: [0x48,0x89,0xfa]
+; EGPR-NEXT:    mulxq %rsi, %rax, %rdx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfb,0xf6,0xd6]
+; EGPR-NEXT:    movq %rdx, (%rcx) # encoding: [0x48,0x89,0x11]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = zext i64 %x to i128
   %y1 = zext i64 %y to i128
   %r1 = mul i128 %x1, %y1
@@ -113,6 +167,14 @@ define i64 @mulx64_load(i64 %x, ptr %y, ptr %p)   {
 ; CHECK-NEXT:    mulxq (%rsi), %rax, %rdx
 ; CHECK-NEXT:    movq %rdx, (%rcx)
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: mulx64_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdx, %rcx # encoding: [0x48,0x89,0xd1]
+; EGPR-NEXT:    movq %rdi, %rdx # encoding: [0x48,0x89,0xfa]
+; EGPR-NEXT:    mulxq (%rsi), %rax, %rdx # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfb,0xf6,0x16]
+; EGPR-NEXT:    movq %rdx, (%rcx) # encoding: [0x48,0x89,0x11]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = load i64, ptr %y
   %x2 = zext i64 %x to i128
   %y2 = zext i64 %y1 to i128
diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll
index 24e38cfeb704df..cabeebb0c3f366 100644
--- a/llvm/test/CodeGen/X86/bmi2.ll
+++ b/llvm/test/CodeGen/X86/bmi2.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 define i32 @bzhi32(i32 %x, i32 %y)   {
 ; X86-LABEL: bzhi32:
@@ -16,6 +17,12 @@ define i32 @bzhi32(i32 %x, i32 %y)   {
 ; X64-NEXT:    addl %edi, %edi
 ; X64-NEXT:    bzhil %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bzhi32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    addl %edi, %edi # encoding: [0x01,0xff]
+; EGPR-NEXT:    bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = add i32 %x, %x
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
   ret i32 %tmp
@@ -33,6 +40,11 @@ define i32 @bzhi32_load(ptr %x, i32 %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    bzhil %esi, (%rdi), %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bzhi32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bzhil %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = load i32, ptr %x
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
   ret i32 %tmp
@@ -52,6 +64,13 @@ define i1 @bzhi32_overflow(i32 %x, i32 %y) {
 ; X64-NEXT:    bzhil %esi, %edi, %eax
 ; X64-NEXT:    setle %al
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: bzhi32_overflow:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
+; EGPR-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; EGPR-NEXT:    setle %al # encoding: [0x0f,0x9e,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
   %cmp = icmp slt i32 %tmp, 1
   ret i1 %cmp
@@ -73,6 +92,12 @@ define i32 @pdep32(i32 %x, i32 %y)   {
 ; X64-NEXT:    addl %esi, %esi
 ; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    addl %esi, %esi # encoding: [0x01,0xf6]
+; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = add i32 %y, %y
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
   ret i32 %tmp
@@ -90,6 +115,11 @@ define i32 @pdep32_load(i32 %x, ptr %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    pdepl (%rsi), %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pdepl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = load i32, ptr %y
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
   ret i32 %tmp
@@ -108,6 +138,13 @@ define i32 @pdep32_anyext(i16 %x)   {
 ; X64-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
 ; X64-NEXT:    pdepl %eax, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_anyext:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl $-1431655766, %eax # encoding: [0xb8,0xaa,0xaa,0xaa,0xaa]
+; EGPR-NEXT:    # imm = 0xAAAAAAAA
+; EGPR-NEXT:    pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = sext i16 %x to i32
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)
   ret i32 %tmp
@@ -126,6 +163,13 @@ define i32 @pdep32_demandedbits(i32 %x) {
 ; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
 ; X64-NEXT:    pdepl %eax, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_demandedbits:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
+; EGPR-NEXT:    # imm = 0x55555555
+; EGPR-NEXT:    pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
   %tmp2 = and i32 %tmp, 1431655765
   ret i32 %tmp2
@@ -144,6 +188,12 @@ define i32 @pdep32_demandedbits2(i32 %x, i32 %y) {
 ; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    andl $128, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_demandedbits2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT:    andl $128, %eax # encoding: [0x25,0x80,0x00,0x00,0x00]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = and i32 %x, 255
   %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
   %tmp3 = and i32 %tmp2, 128
@@ -164,6 +214,13 @@ define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) {
 ; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    andl $32768, %eax # imm = 0x8000
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_demandedbits_mask:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
+; EGPR-NEXT:    # imm = 0x8000
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sext i16 %y to i32
   %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
   %tmp3 = and i32 %tmp2, 32768
@@ -184,6 +241,12 @@ define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) {
 ; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_demandedbits_mask2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT:    movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = sext i16 %y to i32
   %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
   %tmp3 = and i32 %tmp2, 65535
@@ -205,6 +268,14 @@ define i32 @pdep32_knownbits(i32 %x) {
 ; X64-NEXT:    pdepl %eax, %edi, %eax
 ; X64-NEXT:    imull %eax, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_knownbits:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
+; EGPR-NEXT:    # imm = 0x55555555
+; EGPR-NEXT:    pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
+; EGPR-NEXT:    imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
   %tmp2 = and i32 %tmp, 1431655765
   %tmp3 = mul i32 %tmp, %tmp2
@@ -226,6 +297,13 @@ define i32 @pdep32_knownbits2(i32 %x, i32 %y) {
 ; X64-NEXT:    pdepl %esi, %edi, %eax
 ; X64-NEXT:    imull %eax, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pdep32_knownbits2:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    andl $-256, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0xff]
+; EGPR-NEXT:    pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
+; EGPR-NEXT:    imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = and i32 %x, -256
   %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
   %tmp3 = and i32 %tmp2, -256
@@ -249,6 +327,12 @@ define i32 @pext32(i32 %x, i32 %y)   {
 ; X64-NEXT:    addl %esi, %esi
 ; X64-NEXT:    pextl %esi, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pext32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    addl %esi, %esi # encoding: [0x01,0xf6]
+; EGPR-NEXT:    pextl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = add i32 %y, %y
   %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
   ret i32 %tmp
@@ -266,6 +350,11 @@ define i32 @pext32_load(i32 %x, ptr %y)   {
 ; X64:       # %bb.0:
 ; X64-NEXT:    pextl (%rsi), %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pext32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pextl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %y1 = load i32, ptr %y
   %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
   ret i32 %tmp
@@ -284,6 +373,13 @@ define i32 @pext32_knownbits(i32 %x)   {
 ; X64-NEXT:    movl $1431655765, %eax # imm = 0x55555555
 ; X64-NEXT:    pextl %eax, %edi, %eax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: pext32_knownbits:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
+; EGPR-NEXT:    # imm = 0x55555555
+; EGPR-NEXT:    pextl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc0]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765)
   %tmp2 = and i32 %tmp, 65535
   ret i32 %tmp2
@@ -315,6 +411,19 @@ define i32 @mulx32(i32 %x, i32 %y, ptr %p)   {
 ; X64-NEXT:    movl %ecx, (%rdx)
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: mulx32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $esi killed $esi def $rsi
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    addl %edi, %edi # encoding: [0x01,0xff]
+; EGPR-NEXT:    leal (%rsi,%rsi), %eax # encoding: [0x8d,0x04,0x36]
+; EGPR-NEXT:    imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
+; EGPR-NEXT:    movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
+; EGPR-NEXT:    shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
+; EGPR-NEXT:    movl %ecx, (%rdx) # encoding: [0x89,0x0a]
+; EGPR-NEXT:    # kill: def $eax killed $eax killed $rax
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = add i32 %x, %x
   %y1 = add i32 %y, %y
   %x2 = zext i32 %x1 to i64
@@ -349,6 +458,18 @@ define i32 @mulx32_load(i32 %x, ptr %y, ptr %p)   {
 ; X64-NEXT:    movl %ecx, (%rdx)
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
+;
+; EGPR-LABEL: mulx32_load:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    # kill: def $edi killed $edi def $rdi
+; EGPR-NEXT:    leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
+; EGPR-NEXT:    movl (%rsi), %ecx # encoding: [0x8b,0x0e]
+; EGPR-NEXT:    imulq %rcx, %rax # encoding: [0x48,0x0f,0xaf,0xc1]
+; EGPR-NEXT:    movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
+; EGPR-NEXT:    shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
+; EGPR-NEXT:    movl %ecx, (%rdx) # encoding: [0x89,0x0a]
+; EGPR-NEXT:    # kill: def $eax killed $eax killed $rax
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x1 = add i32 %x, %x
   %y1 = load i32, ptr %y
   %x2 = zext i32 %x1 to i64
diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll
index db00e1c49dca7b..bb0213891c9768 100644
--- a/llvm/test/CodeGen/X86/shift-bmi2.ll
+++ b/llvm/test/CodeGen/X86/shift-bmi2.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 -mattr=+egpr --show-mc-encoding < %s | FileCheck --check-prefix=EGPR %s
 
 define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: shl32:
@@ -13,6 +14,11 @@ define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxl %esi, %edi, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shlxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x49,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = shl i32 %x, %shamt
   ret i32 %shl
 }
@@ -29,6 +35,12 @@ define i32 @shl32i(i32 %x) nounwind uwtable readnone {
 ; BMI264-NEXT:    movl %edi, %eax
 ; BMI264-NEXT:    shll $5, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl32i:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    shll $5, %eax # encoding: [0xc1,0xe0,0x05]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = shl i32 %x, 5
   ret i32 %shl
 }
@@ -45,6 +57,11 @@ define i32 @shl32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxl %esi, (%rdi), %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl32p:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shlxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x49,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i32, ptr %p
   %shl = shl i32 %x, %shamt
   ret i32 %shl
@@ -63,6 +80,12 @@ define i32 @shl32pi(ptr %p) nounwind uwtable readnone {
 ; BMI264-NEXT:    movl (%rdi), %eax
 ; BMI264-NEXT:    shll $5, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl32pi:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; EGPR-NEXT:    shll $5, %eax # encoding: [0xc1,0xe0,0x05]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i32, ptr %p
   %shl = shl i32 %x, 5
   ret i32 %shl
@@ -91,6 +114,11 @@ define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxq %rsi, %rdi, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shlxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc9,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = shl i64 %x, %shamt
   ret i64 %shl
 }
@@ -109,6 +137,12 @@ define i64 @shl64i(i64 %x) nounwind uwtable readnone {
 ; BMI264-NEXT:    movq %rdi, %rax
 ; BMI264-NEXT:    shlq $7, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl64i:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    shlq $7, %rax # encoding: [0x48,0xc1,0xe0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = shl i64 %x, 7
   ret i64 %shl
 }
@@ -137,6 +171,11 @@ define i64 @shl64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxq %rsi, (%rdi), %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl64p:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shlxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc9,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i64, ptr %p
   %shl = shl i64 %x, %shamt
   ret i64 %shl
@@ -157,6 +196,12 @@ define i64 @shl64pi(ptr %p) nounwind uwtable readnone {
 ; BMI264-NEXT:    movq (%rdi), %rax
 ; BMI264-NEXT:    shlq $7, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl64pi:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
+; EGPR-NEXT:    shlq $7, %rax # encoding: [0x48,0xc1,0xe0,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i64, ptr %p
   %shl = shl i64 %x, 7
   ret i64 %shl
@@ -173,6 +218,11 @@ define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxl %esi, %edi, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: lshr32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4b,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = lshr i32 %x, %shamt
   ret i32 %shl
 }
@@ -189,6 +239,11 @@ define i32 @lshr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxl %esi, (%rdi), %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: lshr32p:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4b,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i32, ptr %p
   %shl = lshr i32 %x, %shamt
   ret i32 %shl
@@ -217,6 +272,11 @@ define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxq %rsi, %rdi, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: lshr64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xcb,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = lshr i64 %x, %shamt
   ret i64 %shl
 }
@@ -245,6 +305,11 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxq %rsi, (%rdi), %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: lshr64p:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xcb,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i64, ptr %p
   %shl = lshr i64 %x, %shamt
   ret i64 %shl
@@ -261,6 +326,11 @@ define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxl %esi, %edi, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: ashr32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    sarxl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4a,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = ashr i32 %x, %shamt
   ret i32 %shl
 }
@@ -277,6 +347,11 @@ define i32 @ashr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxl %esi, (%rdi), %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: ashr32p:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    sarxl %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x4a,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i32, ptr %p
   %shl = ashr i32 %x, %shamt
   ret i32 %shl
@@ -305,6 +380,11 @@ define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxq %rsi, %rdi, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: ashr64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    sarxq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xca,0xf7,0xc7]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shl = ashr i64 %x, %shamt
   ret i64 %shl
 }
@@ -333,6 +413,11 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxq %rsi, (%rdi), %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: ashr64p:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    sarxq %rsi, (%rdi), %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xca,0xf7,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %x = load i64, ptr %p
   %shl = ashr i64 %x, %shamt
   ret i64 %shl
@@ -349,6 +434,11 @@ define i32 @shl32and(i32 %t, i32 %val) nounwind {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxl %edi, %esi, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl32and:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shlxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x41,0xf7,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shamt = and i32 %t, 31
   %res = shl i32 %val, %shamt
   ret i32 %res
@@ -374,6 +464,11 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxq %rdi, %rsi, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: shl64and:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shlxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc1,0xf7,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shamt = and i64 %t, 63
   %res = shl i64 %val, %shamt
   ret i64 %res
@@ -390,6 +485,11 @@ define i32 @lshr32and(i32 %t, i32 %val) nounwind {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxl %edi, %esi, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: lshr32and:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf7,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shamt = and i32 %t, 31
   %res = lshr i32 %val, %shamt
   ret i32 %res
@@ -415,6 +515,11 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxq %rdi, %rsi, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: lshr64and:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    shrxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc3,0xf7,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shamt = and i64 %t, 63
   %res = lshr i64 %val, %shamt
   ret i64 %res
@@ -431,6 +536,11 @@ define i32 @ashr32and(i32 %t, i32 %val) nounwind {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxl %edi, %esi, %eax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: ashr32and:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    sarxl %edi, %esi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf7,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shamt = and i32 %t, 31
   %res = ashr i32 %val, %shamt
   ret i32 %res
@@ -456,6 +566,11 @@ define i64 @ashr64and(i64 %t, i64 %val) nounwind {
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxq %rdi, %rsi, %rax
 ; BMI264-NEXT:    retq
+;
+; EGPR-LABEL: ashr64and:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    sarxq %rdi, %rsi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc2,0xf7,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %shamt = and i64 %t, 63
   %res = ashr i64 %val, %shamt
   ret i64 %res

>From 6eb87d58252b3c53a12966cac4be038e50f386be Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 9 Jan 2024 17:55:28 -0800
Subject: [PATCH 2/3] add braces to macro

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8d519f181549ff..9b25d9d7ab886a 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4087,7 +4087,7 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
   SDValue Control;
   unsigned ROpc, MOpc;
 
-#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
   if (!PreferBEXTR) {
     assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
     // If we can't make use of BEXTR then we can't fuse shift+mask stages.
@@ -5488,7 +5488,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
     switch (NVT.SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
-#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
     case MVT::i32:
       Opc = UseMULXHi  ? X86::MULX32Hrr
             : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX32rr)

>From e2feb9a7d06930456430c50a01c82fa7ddc78da4 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 10 Jan 2024 22:18:21 -0800
Subject: [PATCH 3/3] use multiclass to avoid messy

---
 llvm/lib/Target/X86/X86InstrArithmetic.td  |  23 ++-
 llvm/lib/Target/X86/X86InstrCompiler.td    | 117 +++------------
 llvm/lib/Target/X86/X86InstrMisc.td        |  61 +++-----
 llvm/lib/Target/X86/X86InstrShiftRotate.td | 163 ++++-----------------
 4 files changed, 75 insertions(+), 289 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 289141ce6c33f5..b09c1d2b301d94 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1338,26 +1338,23 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
 defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
 }
 
-let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
+multiclass Andn_patterns<string Suffix = ""> {
   def : Pat<(and (not GR32:$src1), GR32:$src2),
-            (ANDN32rr GR32:$src1, GR32:$src2)>;
+            (!cast<Instruction>(ANDN32rr#Suffix) GR32:$src1, GR32:$src2)>;
   def : Pat<(and (not GR64:$src1), GR64:$src2),
-            (ANDN64rr GR64:$src1, GR64:$src2)>;
+            (!cast<Instruction>(ANDN64rr#Suffix) GR64:$src1, GR64:$src2)>;
   def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
-            (ANDN32rm GR32:$src1, addr:$src2)>;
+            (!cast<Instruction>(ANDN32rm#Suffix) GR32:$src1, addr:$src2)>;
   def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
-            (ANDN64rm GR64:$src1, addr:$src2)>;
+            (!cast<Instruction>(ANDN64rm#Suffix) GR64:$src1, addr:$src2)>;
+}
+
+let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
+  defm : Andn_patterns<>;
 }
 
 let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in {
-  def : Pat<(and (not GR32:$src1), GR32:$src2),
-            (ANDN32rr_EVEX GR32:$src1, GR32:$src2)>;
-  def : Pat<(and (not GR64:$src1), GR64:$src2),
-            (ANDN64rr_EVEX GR64:$src1, GR64:$src2)>;
-  def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
-            (ANDN32rm_EVEX GR32:$src1, addr:$src2)>;
-  def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
-            (ANDN64rm_EVEX GR64:$src1, addr:$src2)>;
+  defm : Andn_patterns<"_EVEX">;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 3510bdeeff4c09..f9dc035c3fab61 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1864,120 +1864,37 @@ def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
 def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
           (SHRD64rrCL GR64:$src1, GR64:$src2)>;
 
-let Predicates = [HasBMI2, NoEGPR] in {
+multiclass bmi_shift_mask_patterns<SDNode op, string name, string Suffix = ""> {
   let AddedComplexity = 1 in {
-    def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
-              (SARX32rr GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
-              (SARX64rr GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
-              (SHRX32rr GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
-              (SHRX64rr GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
-              (SHLX32rr GR32:$src1,
+    def : Pat<(op GR32:$src1, (shiftMask32 GR8:$src2)),
+              (!cast<Instruction>(name#"32rr"#Suffix) GR32:$src1,
                         (INSERT_SUBREG
                           (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
-              (SHLX64rr GR64:$src1,
+    def : Pat<(op GR64:$src1, (shiftMask64 GR8:$src2)),
+              (!cast<Instruction>(name#"64rr"#Suffix) GR64:$src1,
                         (INSERT_SUBREG
                           (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
   }
-
-  def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
-            (SARX32rm addr:$src1,
+  def : Pat<(op (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+            (!cast<Instruction>(name#"32rm"#Suffix) addr:$src1,
                       (INSERT_SUBREG
                         (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
-            (SARX64rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-  def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
-            (SHRX32rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
-            (SHRX64rm addr:$src1,
+  def : Pat<(op (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+            (!cast<Instruction>(name#"64rm"#Suffix) addr:$src1,
                       (INSERT_SUBREG
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
 
-  def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
-            (SHLX32rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
-            (SHLX64rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+let Predicates = [HasBMI2, NoEGPR] in {
+  defm : bmi_shift_mask_patterns<sra, "SARX">;
+  defm : bmi_shift_mask_patterns<srl, "SHRX">;
+  defm : bmi_shift_mask_patterns<shl, "SHLX">;
 }
 
 let Predicates = [HasBMI2, HasEGPR] in {
-  let AddedComplexity = 1 in {
-    def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
-              (SARX32rr_EVEX GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
-              (SARX64rr_EVEX GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
-              (SHRX32rr_EVEX GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
-              (SHRX64rr_EVEX GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
-              (SHLX32rr_EVEX GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
-              (SHLX64rr_EVEX GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  }
-
-  def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
-            (SARX32rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
-            (SARX64rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-  def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
-            (SHRX32rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
-            (SHRX64rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-  def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
-            (SHLX32rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
-            (SHLX64rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  defm : bmi_shift_mask_patterns<sra, "SARX", "_EVEX">;
+  defm : bmi_shift_mask_patterns<srl, "SHRX", "_EVEX">;
+  defm : bmi_shift_mask_patterns<shl, "SHLX", "_EVEX">;
 }
 
 // Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index a51720d738f47e..d4b67f37b0c6b4 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1241,75 +1241,48 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
   defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
 }
 
-let Predicates = [HasBMI, NoEGPR] in {
+multiclass Bls_patterns<string Suffix = ""> {
   // FIXME(1): patterns for the load versions are not implemented
   // FIXME(2): By only matching `add_su` and `ineg_su` we may emit
   // extra `mov` instructions if `src` has future uses. It may be better
   // to always match if `src` has more users.
   def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
-            (BLSR32rr GR32:$src)>;
+            (!cast<Instruction>(BLSR32rr#Suffix) GR32:$src)>;
   def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
-            (BLSR64rr GR64:$src)>;
+            (!cast<Instruction>(BLSR64rr#Suffix) GR64:$src)>;
 
   def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
-            (BLSMSK32rr GR32:$src)>;
+            (!cast<Instruction>(BLSMSK32rr#Suffix) GR32:$src)>;
   def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
-            (BLSMSK64rr GR64:$src)>;
+            (!cast<Instruction>(BLSMSK64rr#Suffix) GR64:$src)>;
 
   def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
-            (BLSI32rr GR32:$src)>;
+            (!cast<Instruction>(BLSI32rr#Suffix) GR32:$src)>;
   def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
-            (BLSI64rr GR64:$src)>;
+            (!cast<Instruction>(BLSI64rr#Suffix) GR64:$src)>;
 
   // Versions to match flag producing ops.
   def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
-            (BLSR32rr GR32:$src)>;
+            (!cast<Instruction>(BLSR32rr#Suffix) GR32:$src)>;
   def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
-            (BLSR64rr GR64:$src)>;
+            (!cast<Instruction>(BLSR64rr#Suffix) GR64:$src)>;
 
   def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
-            (BLSMSK32rr GR32:$src)>;
+            (!cast<Instruction>(BLSMSK32rr#Suffix) GR32:$src)>;
   def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
-            (BLSMSK64rr GR64:$src)>;
+            (!cast<Instruction>(BLSMSK64rr#Suffix) GR64:$src)>;
 
   def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
-            (BLSI32rr GR32:$src)>;
+            (!cast<Instruction>(BLSI32rr#Suffix) GR32:$src)>;
   def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
-            (BLSI64rr GR64:$src)>;
+            (!cast<Instruction>(BLSI64rr#Suffix) GR64:$src)>;
 }
 
-let Predicates = [HasBMI, HasEGPR] in {
-  def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
-            (BLSR32rr_EVEX GR32:$src)>;
-  def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
-            (BLSR64rr_EVEX GR64:$src)>;
-
-  def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
-            (BLSMSK32rr_EVEX GR32:$src)>;
-  def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
-            (BLSMSK64rr_EVEX GR64:$src)>;
-
-  def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
-            (BLSI32rr_EVEX GR32:$src)>;
-  def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
-            (BLSI64rr_EVEX GR64:$src)>;
-
-  // Versions to match flag producing ops.
-  def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
-            (BLSR32rr_EVEX GR32:$src)>;
-  def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
-            (BLSR64rr_EVEX GR64:$src)>;
+let Predicates = [HasBMI, NoEGPR] in
+  defm : Bls_patterns<>;
 
-  def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
-            (BLSMSK32rr_EVEX GR32:$src)>;
-  def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
-            (BLSMSK64rr_EVEX GR64:$src)>;
-
-  def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
-            (BLSI32rr_EVEX GR32:$src)>;
-  def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
-            (BLSI64rr_EVEX GR64:$src)>;
-}
+let Predicates = [HasBMI, HasEGPR] in
+  defm : Bls_patterns<"_EVEX">;
 
 multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
                     X86FoldableSchedWrite sched, string Suffix = ""> {
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index e225fe6950e3da..9d58c908208f5c 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,30 +879,7 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
   defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
 }
 
-let Predicates = [HasBMI2, NoEGPR] in {
-  // Prefer RORX which is non-destructive and doesn't update EFLAGS.
-  let AddedComplexity = 10 in {
-    def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
-              (RORX32ri GR32:$src, imm:$shamt)>;
-    def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
-              (RORX64ri GR64:$src, imm:$shamt)>;
-
-    def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
-              (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
-    def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
-              (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
-  }
-
-  def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
-            (RORX32mi addr:$src, imm:$shamt)>;
-  def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
-            (RORX64mi addr:$src, imm:$shamt)>;
-
-  def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
-            (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
-  def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
-            (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
-
+multiclass bmi_shift_patterns<SDNode op, string name, string Suffix = ""> {
   // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
   // immediate shift, i.e. the following code is considered better
   //
@@ -917,34 +894,15 @@ let Predicates = [HasBMI2, NoEGPR] in {
   //  ... %edi, ...
   //
   let AddedComplexity = 1 in {
-    def : Pat<(sra GR32:$src1, GR8:$src2),
-              (SARX32rr GR32:$src1,
+    def : Pat<(op GR32:$src1, GR8:$src2),
+              (!cast<Instruction>(name#"32rr"#Suffix) GR32:$src1,
                         (INSERT_SUBREG
                           (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(sra GR64:$src1, GR8:$src2),
-              (SARX64rr GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(srl GR32:$src1, GR8:$src2),
-              (SHRX32rr GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(srl GR64:$src1, GR8:$src2),
-              (SHRX64rr GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(shl GR32:$src1, GR8:$src2),
-              (SHLX32rr GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(shl GR64:$src1, GR8:$src2),
-              (SHLX64rr GR64:$src1,
+    def : Pat<(op GR64:$src1, GR8:$src2),
+              (!cast<Instruction>(name#"64rr"#Suffix) GR64:$src1,
                         (INSERT_SUBREG
                           (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
   }
-
   // We prefer to use
   //  mov (%ecx), %esi
   //  shl $imm, $esi
@@ -955,112 +913,53 @@ let Predicates = [HasBMI2, NoEGPR] in {
   //  shlx %al, (%ecx), %esi
   //
   // This priority is enforced by IsProfitableToFoldLoad.
-  def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
-            (SARX32rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
-            (SARX64rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-  def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
-            (SHRX32rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
-            (SHRX64rm addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-  def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
-            (SHLX32rm addr:$src1,
+  def : Pat<(op (loadi32 addr:$src1), GR8:$src2),
+            (!cast<Instruction>(name#"32rm"#Suffix) addr:$src1,
                       (INSERT_SUBREG
                         (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
-            (SHLX64rm addr:$src1,
+  def : Pat<(op (loadi64 addr:$src1), GR8:$src2),
+            (!cast<Instruction>(name#"64rm"#Suffix) addr:$src1,
                       (INSERT_SUBREG
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 }
 
-let Predicates = [HasBMI2, HasEGPR] in {
+multiclass RORX_patterns<string Suffix = ""> {
+  // Prefer RORX which is non-destructive and doesn't update EFLAGS.
   let AddedComplexity = 10 in {
     def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
-              (RORX32ri_EVEX GR32:$src, imm:$shamt)>;
+              (!cast<Instruction>(RORX32ri#Suffix) GR32:$src, imm:$shamt)>;
     def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
-              (RORX64ri_EVEX GR64:$src, imm:$shamt)>;
+              (!cast<Instruction>(RORX64ri#Suffix) GR64:$src, imm:$shamt)>;
 
     def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
-              (RORX32ri_EVEX GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+              (!cast<Instruction>(RORX32ri#Suffix) GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
     def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
-              (RORX64ri_EVEX GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+              (!cast<Instruction>(RORX64ri#Suffix) GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
   }
 
   def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
-            (RORX32mi_EVEX addr:$src, imm:$shamt)>;
+            (!cast<Instruction>(RORX32mi#Suffix) addr:$src, imm:$shamt)>;
   def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
-            (RORX64mi_EVEX addr:$src, imm:$shamt)>;
+            (!cast<Instruction>(RORX64mi#Suffix) addr:$src, imm:$shamt)>;
 
   def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
-            (RORX32mi_EVEX addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+            (!cast<Instruction>(RORX32mi#Suffix) addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
   def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
-            (RORX64mi_EVEX addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
-
-  let AddedComplexity = 1 in {
-    def : Pat<(sra GR32:$src1, GR8:$src2),
-              (SARX32rr_EVEX GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(sra GR64:$src1, GR8:$src2),
-              (SARX64rr_EVEX GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(srl GR32:$src1, GR8:$src2),
-              (SHRX32rr_EVEX GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(srl GR64:$src1, GR8:$src2),
-              (SHRX64rr_EVEX GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-
-    def : Pat<(shl GR32:$src1, GR8:$src2),
-              (SHLX32rr_EVEX GR32:$src1,
-                        (INSERT_SUBREG
-                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(shl GR64:$src1, GR8:$src2),
-              (SHLX64rr_EVEX GR64:$src1,
-                        (INSERT_SUBREG
-                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  }
-
-  def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
-            (SARX32rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
-            (SARX64rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+            (!cast<Instruction>(RORX64mi#Suffix) addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+}
 
-  def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
-            (SHRX32rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
-            (SHRX64rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+let Predicates = [HasBMI2, NoEGPR] in {
+  defm : RORX_patterns<>;
+  defm : bmi_shift_patterns<sra, "SARX">;
+  defm : bmi_shift_patterns<srl, "SHRX">;
+  defm : bmi_shift_patterns<shl, "SHLX">;
+}
 
-  def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
-            (SHLX32rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
-            (SHLX64rm_EVEX addr:$src1,
-                      (INSERT_SUBREG
-                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+let Predicates = [HasBMI2, HasEGPR] in {
+  defm : RORX_patterns<"_EVEX">;
+  defm : bmi_shift_patterns<sra, "SARX", "_EVEX">;
+  defm : bmi_shift_patterns<srl, "SHRX", "_EVEX">;
+  defm : bmi_shift_patterns<shl, "SHLX", "_EVEX">;
 }
 
 def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),