[llvm] [X86][MC]Support Enc/Dec for EGPR for Promoted BMI instructions (PR #73899)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 22:09:09 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mc

Author: None (XinWang10)

<details>
<summary>Changes</summary>

R16-R31 was added into GPRs in https://github.com/llvm/llvm-project/pull/70958,
This patch supports the encoding/decoding for promoted BMI instructions in EVEX space.


RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4

---

Patch is 65.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73899.diff


48 Files Affected:

- (modified) llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (+3-3) 
- (modified) llvm/lib/Target/X86/X86InstrArithmetic.td (+42-16) 
- (modified) llvm/lib/Target/X86/X86InstrInfo.td (+3) 
- (modified) llvm/lib/Target/X86/X86InstrMisc.td (+83-69) 
- (modified) llvm/lib/Target/X86/X86InstrShiftRotate.td (+37-22) 
- (added) llvm/test/MC/Disassembler/X86/apx/andn.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/bextr.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/blsi.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/blsmsk.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/blsr.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/bzhi.txt (+18) 
- (modified) llvm/test/MC/Disassembler/X86/apx/evex-format.txt (+12) 
- (added) llvm/test/MC/Disassembler/X86/apx/mulx.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/pdep.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/pext.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/rorx.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/sarx.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/shlx.txt (+18) 
- (added) llvm/test/MC/Disassembler/X86/apx/shrx.txt (+18) 
- (added) llvm/test/MC/X86/apx/andn-att.s (+20) 
- (added) llvm/test/MC/X86/apx/andn-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/bextr-att.s (+20) 
- (added) llvm/test/MC/X86/apx/bextr-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/blsi-att.s (+20) 
- (added) llvm/test/MC/X86/apx/blsi-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/blsmsk-att.s (+20) 
- (added) llvm/test/MC/X86/apx/blsmsk-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/blsr-att.s (+20) 
- (added) llvm/test/MC/X86/apx/blsr-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/bzhi-att.s (+20) 
- (added) llvm/test/MC/X86/apx/bzhi-intel.s (+17) 
- (modified) llvm/test/MC/X86/apx/evex-format-att.s (+12) 
- (modified) llvm/test/MC/X86/apx/evex-format-intel.s (+12) 
- (added) llvm/test/MC/X86/apx/mulx-att.s (+20) 
- (added) llvm/test/MC/X86/apx/mulx-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/pdep-att.s (+20) 
- (added) llvm/test/MC/X86/apx/pdep-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/pext-att.s (+20) 
- (added) llvm/test/MC/X86/apx/pext-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/rorx-att.s (+20) 
- (added) llvm/test/MC/X86/apx/rorx-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/sarx-att.s (+20) 
- (added) llvm/test/MC/X86/apx/sarx-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/shlx-att.s (+20) 
- (added) llvm/test/MC/X86/apx/shlx-intel.s (+17) 
- (added) llvm/test/MC/X86/apx/shrx-att.s (+20) 
- (added) llvm/test/MC/X86/apx/shrx-intel.s (+17) 
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+26) 


``````````diff
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 1f130c22298ed47..b6ebbcf56aef73d 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1115,10 +1115,10 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
   case X86II::MRMSrcMem4VOp3: {
     // Instruction format for 4VOp3:
     //   src1(ModR/M), MemAddr, src3(VEX_4V)
-    Prefix.setR(MI, CurOp++);
+    Prefix.setRR2(MI, CurOp++);
     Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
     Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
-    Prefix.set4V(MI, CurOp + X86::AddrNumOperands);
+    Prefix.set4VV2(MI, CurOp + X86::AddrNumOperands);
     break;
   }
   case X86II::MRMSrcMemOp4: {
@@ -1189,7 +1189,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
     //   src1(ModR/M), src2(ModR/M), src3(VEX_4V)
     Prefix.setRR2(MI, CurOp++);
     Prefix.setBB2(MI, CurOp++);
-    Prefix.set4V(MI, CurOp++);
+    Prefix.set4VV2(MI, CurOp++);
     break;
   }
   case X86II::MRMSrcRegOp4: {
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 48188da291ded0a..56cbc13eaaec8d6 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1289,21 +1289,34 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
 //
 multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
                     PatFrag ld_frag, X86FoldableSchedWrite sched> {
+let Predicates = [HasBMI, NoEGPR] in {
   def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-            !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
-            Sched<[sched]>;
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
+           VEX_4V, Sched<[sched]>;
   def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
-            !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [(set RC:$dst, EFLAGS,
-             (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
-           Sched<[sched.Folded, sched.ReadAfterFold]>;
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, EFLAGS,
+              (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
+           VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+}
+let Predicates = [HasBMI, HasEGPR, In64BitMode] in {
+  def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                  !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
+                EVEX_4V, Sched<[sched]>;
+  def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                  !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set RC:$dst, EFLAGS,
+                   (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
+                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+}
 }
 
 // Complexity is reduced to give and with immediate a chance to match first.
-let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
-  defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V;
-  defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, REX_W;
+let Defs = [EFLAGS], AddedComplexity = -6 in {
+  defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS;
+  defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W;
 }
 
 let Predicates = [HasBMI], AddedComplexity = -6 in {
@@ -1323,6 +1336,7 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
 multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
                     X86FoldableSchedWrite sched> {
 let hasSideEffects = 0 in {
+let Predicates = [HasBMI2, NoEGPR] in {
   def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
              !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
              []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
@@ -1346,15 +1360,27 @@ let hasSideEffects = 0 in {
   def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
                     []>, Sched<[sched.Folded]>;
 }
+let Predicates = [HasBMI2, HasEGPR, In64BitMode] in
+  def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
+                   !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+                   []>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>;
+let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in
+  def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
+                   !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+                   []>, T8XD, EVEX_4V,
+                 Sched<[WriteIMulHLd, sched.Folded,
+                        // Memory operand.
+                        ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                        // Implicit read of EDX/RDX
+                        sched.ReadAfterFold]>;
 }
-
-let Predicates = [HasBMI2] in {
-  let Uses = [EDX] in
-    defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
-  let Uses = [RDX] in
-    defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
 }
 
+let Uses = [EDX] in
+  defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
+let Uses = [RDX] in
+  defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
+
 //===----------------------------------------------------------------------===//
 // ADCX and ADOX Instructions
 //
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 9046b6af463acf9..988e2a6707c0009 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -878,6 +878,9 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
 // X86 Instruction Predicate Definitions.
 def TruePredicate : Predicate<"true">;
 
+def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
+def NoEGPR  : Predicate<"!Subtarget->hasEGPR()">;
+
 def HasCMOV      : Predicate<"Subtarget->canUseCMOV()">;
 def NoCMOV       : Predicate<"!Subtarget->canUseCMOV()">;
 
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 32aa82fc93ca302..764d4bd6da2a1df 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1214,19 +1214,19 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
 
 multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
                   RegisterClass RC, X86MemOperand x86memop,
-                  X86FoldableSchedWrite sched> {
+                  X86FoldableSchedWrite sched, string Suffix = ""> {
 let hasSideEffects = 0 in {
-  def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
-             !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
-             T8PS, VEX_4V, Sched<[sched]>;
+  def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
+                    !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
+                  T8PS, VEX_4V, Sched<[sched]>;
   let mayLoad = 1 in
-  def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
-             !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
-             T8PS, VEX_4V, Sched<[sched.Folded]>;
+  def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
+                    !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
+                  T8PS, VEX_4V, Sched<[sched.Folded]>;
 }
 }
 
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
+let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
   defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
   defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W;
   defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
@@ -1235,6 +1235,15 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
   defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W;
 }
 
+let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
+  defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
+  defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+  defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
+  defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+  defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX;
+  defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX;
+}
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments to auto generate BMI instructions.
 //===----------------------------------------------------------------------===//
@@ -1292,56 +1301,50 @@ let Predicates = [HasBMI] in {
             (BLSI64rr GR64:$src)>;
 }
 
-multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
-                     X86MemOperand x86memop, SDNode OpNode,
-                     PatFrag ld_frag, X86FoldableSchedWrite Sched> {
-  def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
-             T8PS, VEX, Sched<[Sched]>;
-  def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
-             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
-              (implicit EFLAGS)]>, T8PS, VEX,
-             Sched<[Sched.Folded,
-                    // x86memop:$src1
-                    ReadDefault, ReadDefault, ReadDefault, ReadDefault,
-                    ReadDefault,
-                    // RC:$src2
-                    Sched.ReadAfterFold]>;
+multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC,
+                         X86MemOperand x86memop, SDPatternOperator OpNode,
+                         PatFrag ld_frag, X86FoldableSchedWrite Sched,
+                         string Suffix = ""> {
+  def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                    !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+                  T8PS, VEX, Sched<[Sched]>;
+let mayLoad = 1 in
+  def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+                    !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
+                     (implicit EFLAGS)]>, T8PS, VEX,
+                  Sched<[Sched.Folded,
+                         // x86memop:$src1
+                         ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                         ReadDefault,
+                         // RC:$src2
+                         Sched.ReadAfterFold]>;
 }
 
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
-  defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
-                           X86bextr, loadi32, WriteBEXTR>;
-  defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
-                           X86bextr, loadi64, WriteBEXTR>, REX_W;
-}
-
-multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
-                    X86MemOperand x86memop, SDNode Int,
-                    PatFrag ld_frag, X86FoldableSchedWrite Sched> {
-  def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
-             T8PS, VEX, Sched<[Sched]>;
-  def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
-             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
-              (implicit EFLAGS)]>, T8PS, VEX,
-             Sched<[Sched.Folded,
-                    // x86memop:$src1
-                    ReadDefault, ReadDefault, ReadDefault, ReadDefault,
-                    ReadDefault,
-                    // RC:$src2
-                    Sched.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI2], Defs = [EFLAGS] in {
-  defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
-                         X86bzhi, loadi32, WriteBZHI>;
-  defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
-                         X86bzhi, loadi64, WriteBZHI>, REX_W;
+let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in {
+  defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
+                               X86bextr, loadi32, WriteBEXTR>;
+  defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
+                               X86bextr, loadi64, WriteBEXTR>, REX_W;
+}
+let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in {
+  defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
+                              X86bzhi, loadi32, WriteBZHI>;
+  defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
+                              X86bzhi, loadi64, WriteBZHI>, REX_W;
+}
+let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in {
+  defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem,
+                               X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX;
+  defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem,
+                               X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W;
+}
+let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in {
+  defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem,
+                              X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX;
+  defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem,
+                              X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W;
 }
 
 def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -1383,19 +1386,19 @@ let Predicates = [HasBMI2, NoTBM] in {
 }
 
 multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
-                         X86MemOperand x86memop, SDNode OpNode,
-                         PatFrag ld_frag> {
-  def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
-             VEX_4V, Sched<[WriteALU]>;
-  def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
-             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
-             VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI2] in {
+                         X86MemOperand x86memop, SDPatternOperator OpNode,
+                         PatFrag ld_frag, string Suffix = ""> {
+  def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                    !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
+                  VEX_4V, Sched<[WriteALU]>;
+  def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                    !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
+                  VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+}
+
+let Predicates = [HasBMI2, NoEGPR] in {
   defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
                                X86pdep, loadi32>, T8XD;
   defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
@@ -1406,6 +1409,17 @@ let Predicates = [HasBMI2] in {
                                X86pext, loadi64>, T8XS, REX_W;
 }
 
+let Predicates = [HasBMI2, HasEGPR] in {
+  defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+                               X86pdep, loadi32, "_EVEX">, T8XD, EVEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+                               X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX;
+  defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+                               X86pext, loadi32, "_EVEX">, T8XS, EVEX;
+  defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+                               X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX;
+}
+
 //===----------------------------------------------------------------------===//
 // Lightweight Profiling Instructions
 
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index e416e4495e22778..48bf23f8cbf7b2f 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -824,38 +824,40 @@ def ROT64L2R_imm8  : SDNodeXForm<imm, [{
 
 // NOTE: We use WriteShift for these rotates as they avoid the stalls
 // of many of the older x86 rotate instructions.
-multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
+multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop,
+                      string Suffix = ""> {
 let hasSideEffects = 0 in {
-  def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
-               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []>, TAXD, VEX, Sched<[WriteShift]>;
+  def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
+                      !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+                  TAXD, VEX, Sched<[WriteShift]>;
   let mayLoad = 1 in
-  def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
-               (ins x86memop:$src1, u8imm:$src2),
-               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []>, TAXD, VEX, Sched<[WriteShiftLd]>;
+  def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
+                      (ins x86memop:$src1, u8imm:$src2),
+                      !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+                  TAXD, VEX, Sched<[WriteShiftLd]>;
 }
 }
 
-multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
+multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop,
+                     string Suffix = ""> {
 let hasSideEffects = 0 in {
-  def rr : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX, Sched<[WriteShift]>;
+  def rr#Suffix : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                    !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+                    VEX, Sched<[WriteShift]>;
   let mayLoad = 1 in
-  def rm : I<0xF7, MRMSrcMem4VOp3,
-             (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
-             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX, Sched<[WriteShift.Folded,
-                         // x86memop:$src1
-                         ReadDefault, ReadDefault, ReadDefault, ReadDefault,
-                         ReadDefault,
-                         // RC:$src2
-                         WriteShift.ReadAfterFold]>;
+  def rm#Suffix : I<0xF7, MRMSrcMem4VOp3,
+                    (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+                    !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+                  VEX, Sched<[WriteShift.Folded,
+                              // x86memop:$src1
+                              ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                              ReadDefault,
+  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/73899


More information about the llvm-commits mailing list