[llvm] [X86][MC] Fix wrong encoding of promoted BMI instructions due to missing NoCD8 (PR #78386)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 17 20:10:05 PST 2024


https://github.com/XinWang10 updated https://github.com/llvm/llvm-project/pull/78386

>From 9bc8ee2ed3a5b594c02c1951abe14d65933080d4 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 16 Jan 2024 19:31:29 -0800
Subject: [PATCH 1/5] [X86][MC] Fix wrong encoding due to missing NoCD8

---
 llvm/lib/Target/X86/X86InstrSSE.td         |  4 ++--
 llvm/lib/Target/X86/X86InstrUtils.td       |  6 +++---
 llvm/test/MC/Disassembler/X86/apx/bmi2.txt | 20 ++++++++++----------
 llvm/test/MC/X86/apx/bmi2-att.s            | 20 ++++++++++----------
 llvm/test/MC/X86/apx/bmi2-intel.s          | 20 ++++++++++----------
 5 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index a8cd1996eeb356b..7d94fec9a354d04 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -6663,14 +6663,14 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
 class Crc32r<X86TypeInfo t, RegisterClass rc, SDPatternOperator node>
   : ITy<0xF1, MRMSrcReg, t, (outs rc:$dst), (ins rc:$src1, t.RegClass:$src2),
       "crc32", binop_args, [(set rc:$dst, (node rc:$src1, t.RegClass:$src2))]>,
-    Sched<[WriteCRC32]>, NoCD8 {
+    Sched<[WriteCRC32]> {
   let Constraints = "$src1 = $dst";
 }
 
 class Crc32m<X86TypeInfo t, RegisterClass rc, SDPatternOperator node>
   : ITy<0xF1, MRMSrcMem, t, (outs rc:$dst), (ins rc:$src1, t.MemOperand:$src2),
       "crc32", binop_args, [(set rc:$dst, (node rc:$src1, (load addr:$src2)))]>,
-    Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>, NoCD8 {
+    Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]> {
   let Constraints = "$src1 = $dst";
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td
index e79240ae443ade5..27aeff1cd3ae2c0 100644
--- a/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/llvm/lib/Target/X86/X86InstrUtils.td
@@ -113,9 +113,9 @@ class NDD<bit ndd, Map map = OB> {
   Map OpMap = !if(!eq(ndd, 0), map, T_MAP4);
 }
 // NF - Helper for NF (no flags update) instructions
-class NF: T_MAP4, EVEX, EVEX_NF, NoCD8;
+class NF: T_MAP4, EVEX, EVEX_NF;
 // PL - Helper for promoted legacy instructions
-class PL: T_MAP4, EVEX, NoCD8, ExplicitEVEXPrefix;
+class PL: T_MAP4, EVEX, ExplicitEVEXPrefix;
 
 //===----------------------------------------------------------------------===//
 // X86 Type infomation definitions
@@ -961,7 +961,7 @@ class ITy<bits<8> o, Format f, X86TypeInfo t, dag outs, dag ins, string m,
           string args, list<dag> p>
   : I<{o{7}, o{6}, o{5}, o{4}, o{3}, o{2}, o{1},
        !if(!eq(t.HasEvenOpcode, 1), 0, o{0})}, f, outs, ins,
-      !strconcat(m, "{", t.InstrSuffix, "}\t", args), p> {
+      !strconcat(m, "{", t.InstrSuffix, "}\t", args), p>, NoCD8 {
   let hasSideEffects = 0;
   let hasREX_W  = t.HasREX_W;
 }
diff --git a/llvm/test/MC/Disassembler/X86/apx/bmi2.txt b/llvm/test/MC/Disassembler/X86/apx/bmi2.txt
index 0fb11f4061f1b93..428cf2d148e2dd1 100644
--- a/llvm/test/MC/Disassembler/X86/apx/bmi2.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/bmi2.txt
@@ -13,11 +13,11 @@
 
 # ATT:   mulxl	123(%rax,%rbx,4), %ecx, %edx
 # INTEL: mulx	edx, ecx, dword ptr [rax + 4*rbx + 123]
-0x62,0xf2,0x77,0x08,0xf6,0x94,0x98,0x7b,0x00,0x00,0x00
+0x62,0xf2,0x77,0x08,0xf6,0x54,0x98,0x7b
 
 # ATT:   mulxq	123(%rax,%rbx,4), %r9, %r15
 # INTEL: mulx	r15, r9, qword ptr [rax + 4*rbx + 123]
-0x62,0x72,0xb7,0x08,0xf6,0xbc,0x98,0x7b,0x00,0x00,0x00
+0x62,0x72,0xb7,0x08,0xf6,0x7c,0x98,0x7b
 
 # ATT:   mulxl	%r18d, %r22d, %r26d
 # INTEL: mulx	r26d, r22d, r18d
@@ -115,11 +115,11 @@
 
 # ATT:   rorxl	$123, 123(%rax,%rbx,4), %ecx
 # INTEL: rorx	ecx, dword ptr [rax + 4*rbx + 123], 123
-0x62,0xf3,0x7f,0x08,0xf0,0x8c,0x98,0x7b,0x00,0x00,0x00,0x7b
+0x62,0xf3,0x7f,0x08,0xf0,0x4c,0x98,0x7b,0x7b
 
 # ATT:   rorxq	$123, 123(%rax,%rbx,4), %r9
 # INTEL: rorx	r9, qword ptr [rax + 4*rbx + 123], 123
-0x62,0x73,0xff,0x08,0xf0,0x8c,0x98,0x7b,0x00,0x00,0x00,0x7b
+0x62,0x73,0xff,0x08,0xf0,0x4c,0x98,0x7b,0x7b
 
 # ATT:   rorxl	$123, %r18d, %r22d
 # INTEL: rorx	r22d, r18d, 123
@@ -145,7 +145,7 @@
 
 # ATT:   sarxl	%ecx, 123(%rax,%rbx,4), %edx
 # INTEL: sarx	edx, dword ptr [rax + 4*rbx + 123], ecx
-0x62,0xf2,0x76,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00
+0x62,0xf2,0x76,0x08,0xf7,0x54,0x98,0x7b
 
 # ATT:   sarxq	%r9, %r15, %r11
 # INTEL: sarx	r11, r15, r9
@@ -153,7 +153,7 @@
 
 # ATT:   sarxq	%r9, 123(%rax,%rbx,4), %r15
 # INTEL: sarx	r15, qword ptr [rax + 4*rbx + 123], r9
-0x62,0x72,0xb6,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00
+0x62,0x72,0xb6,0x08,0xf7,0x7c,0x98,0x7b
 
 # ATT:   sarxl	%r18d, %r22d, %r26d
 # INTEL: sarx	r26d, r22d, r18d
@@ -179,7 +179,7 @@
 
 # ATT:   shlxl	%ecx, 123(%rax,%rbx,4), %edx
 # INTEL: shlx	edx, dword ptr [rax + 4*rbx + 123], ecx
-0x62,0xf2,0x75,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00
+0x62,0xf2,0x75,0x08,0xf7,0x54,0x98,0x7b
 
 # ATT:   shlxq	%r9, %r15, %r11
 # INTEL: shlx	r11, r15, r9
@@ -187,7 +187,7 @@
 
 # ATT:   shlxq	%r9, 123(%rax,%rbx,4), %r15
 # INTEL: shlx	r15, qword ptr [rax + 4*rbx + 123], r9
-0x62,0x72,0xb5,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00
+0x62,0x72,0xb5,0x08,0xf7,0x7c,0x98,0x7b
 
 # ATT:   shlxl	%r18d, %r22d, %r26d
 # INTEL: shlx	r26d, r22d, r18d
@@ -213,7 +213,7 @@
 
 # ATT:   shrxl	%ecx, 123(%rax,%rbx,4), %edx
 # INTEL: shrx	edx, dword ptr [rax + 4*rbx + 123], ecx
-0x62,0xf2,0x77,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00
+0x62,0xf2,0x77,0x08,0xf7,0x54,0x98,0x7b
 
 # ATT:   shrxq	%r9, %r15, %r11
 # INTEL: shrx	r11, r15, r9
@@ -221,7 +221,7 @@
 
 # ATT:   shrxq	%r9, 123(%rax,%rbx,4), %r15
 # INTEL: shrx	r15, qword ptr [rax + 4*rbx + 123], r9
-0x62,0x72,0xb7,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00
+0x62,0x72,0xb7,0x08,0xf7,0x7c,0x98,0x7b
 
 # ATT:   shrxl	%r18d, %r22d, %r26d
 # INTEL: shrx	r26d, r22d, r18d
diff --git a/llvm/test/MC/X86/apx/bmi2-att.s b/llvm/test/MC/X86/apx/bmi2-att.s
index 14e8566e799d59b..20544d7922cfee7 100644
--- a/llvm/test/MC/X86/apx/bmi2-att.s
+++ b/llvm/test/MC/X86/apx/bmi2-att.s
@@ -15,11 +15,11 @@
          {evex}	mulxq	%r9, %r15, %r11
 
 # CHECK: {evex}	mulxl	123(%rax,%rbx,4), %ecx, %edx
-# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf6,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf6,0x54,0x98,0x7b]
          {evex}	mulxl	123(%rax,%rbx,4), %ecx, %edx
 
 # CHECK: {evex}	mulxq	123(%rax,%rbx,4), %r9, %r15
-# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf6,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf6,0x7c,0x98,0x7b]
          {evex}	mulxq	123(%rax,%rbx,4), %r9, %r15
 
 # CHECK: mulxl	%r18d, %r22d, %r26d
@@ -117,11 +117,11 @@
          {evex}	rorxq	$123, %r9, %r15
 
 # CHECK: {evex}	rorxl	$123, 123(%rax,%rbx,4), %ecx
-# CHECK: encoding: [0x62,0xf3,0x7f,0x08,0xf0,0x8c,0x98,0x7b,0x00,0x00,0x00,0x7b]
+# CHECK: encoding: [0x62,0xf3,0x7f,0x08,0xf0,0x4c,0x98,0x7b,0x7b]
          {evex}	rorxl	$123, 123(%rax,%rbx,4), %ecx
 
 # CHECK: {evex}	rorxq	$123, 123(%rax,%rbx,4), %r9
-# CHECK: encoding: [0x62,0x73,0xff,0x08,0xf0,0x8c,0x98,0x7b,0x00,0x00,0x00,0x7b]
+# CHECK: encoding: [0x62,0x73,0xff,0x08,0xf0,0x4c,0x98,0x7b,0x7b]
          {evex}	rorxq	$123, 123(%rax,%rbx,4), %r9
 
 # CHECK: rorxl	$123, %r18d, %r22d
@@ -147,7 +147,7 @@
          {evex}	sarxl	%ecx, %edx, %r10d
 
 # CHECK: {evex}	sarxl	%ecx, 123(%rax,%rbx,4), %edx
-# CHECK: encoding: [0x62,0xf2,0x76,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x76,0x08,0xf7,0x54,0x98,0x7b]
          {evex}	sarxl	%ecx, 123(%rax,%rbx,4), %edx
 
 # CHECK: {evex}	sarxq	%r9, %r15, %r11
@@ -155,7 +155,7 @@
          {evex}	sarxq	%r9, %r15, %r11
 
 # CHECK: {evex}	sarxq	%r9, 123(%rax,%rbx,4), %r15
-# CHECK: encoding: [0x62,0x72,0xb6,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb6,0x08,0xf7,0x7c,0x98,0x7b]
          {evex}	sarxq	%r9, 123(%rax,%rbx,4), %r15
 
 # CHECK: sarxl	%r18d, %r22d, %r26d
@@ -181,7 +181,7 @@
          {evex}	shlxl	%ecx, %edx, %r10d
 
 # CHECK: {evex}	shlxl	%ecx, 123(%rax,%rbx,4), %edx
-# CHECK: encoding: [0x62,0xf2,0x75,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x75,0x08,0xf7,0x54,0x98,0x7b]
          {evex}	shlxl	%ecx, 123(%rax,%rbx,4), %edx
 
 # CHECK: {evex}	shlxq	%r9, %r15, %r11
@@ -189,7 +189,7 @@
          {evex}	shlxq	%r9, %r15, %r11
 
 # CHECK: {evex}	shlxq	%r9, 123(%rax,%rbx,4), %r15
-# CHECK: encoding: [0x62,0x72,0xb5,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb5,0x08,0xf7,0x7c,0x98,0x7b]
          {evex}	shlxq	%r9, 123(%rax,%rbx,4), %r15
 
 # CHECK: shlxl	%r18d, %r22d, %r26d
@@ -215,7 +215,7 @@
          {evex}	shrxl	%ecx, %edx, %r10d
 
 # CHECK: {evex}	shrxl	%ecx, 123(%rax,%rbx,4), %edx
-# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf7,0x54,0x98,0x7b]
          {evex}	shrxl	%ecx, 123(%rax,%rbx,4), %edx
 
 # CHECK: {evex}	shrxq	%r9, %r15, %r11
@@ -223,7 +223,7 @@
          {evex}	shrxq	%r9, %r15, %r11
 
 # CHECK: {evex}	shrxq	%r9, 123(%rax,%rbx,4), %r15
-# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf7,0x7c,0x98,0x7b]
          {evex}	shrxq	%r9, 123(%rax,%rbx,4), %r15
 
 # CHECK: shrxl	%r18d, %r22d, %r26d
diff --git a/llvm/test/MC/X86/apx/bmi2-intel.s b/llvm/test/MC/X86/apx/bmi2-intel.s
index f21004fdd696abf..fe96fbc0be8d7a5 100644
--- a/llvm/test/MC/X86/apx/bmi2-intel.s
+++ b/llvm/test/MC/X86/apx/bmi2-intel.s
@@ -11,11 +11,11 @@
          {evex}	mulx	r11, r15, r9
 
 # CHECK: {evex}	mulx	edx, ecx, dword ptr [rax + 4*rbx + 123]
-# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf6,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf6,0x54,0x98,0x7b]
          {evex}	mulx	edx, ecx, dword ptr [rax + 4*rbx + 123]
 
 # CHECK: {evex}	mulx	r15, r9, qword ptr [rax + 4*rbx + 123]
-# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf6,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf6,0x7c,0x98,0x7b]
          {evex}	mulx	r15, r9, qword ptr [rax + 4*rbx + 123]
 
 # CHECK: mulx	r26d, r22d, r18d
@@ -113,11 +113,11 @@
          {evex}	rorx	r15, r9, 123
 
 # CHECK: {evex}	rorx	ecx, dword ptr [rax + 4*rbx + 123], 123
-# CHECK: encoding: [0x62,0xf3,0x7f,0x08,0xf0,0x8c,0x98,0x7b,0x00,0x00,0x00,0x7b]
+# CHECK: encoding: [0x62,0xf3,0x7f,0x08,0xf0,0x4c,0x98,0x7b,0x7b]
          {evex}	rorx	ecx, dword ptr [rax + 4*rbx + 123], 123
 
 # CHECK: {evex}	rorx	r9, qword ptr [rax + 4*rbx + 123], 123
-# CHECK: encoding: [0x62,0x73,0xff,0x08,0xf0,0x8c,0x98,0x7b,0x00,0x00,0x00,0x7b]
+# CHECK: encoding: [0x62,0x73,0xff,0x08,0xf0,0x4c,0x98,0x7b,0x7b]
          {evex}	rorx	r9, qword ptr [rax + 4*rbx + 123], 123
 
 # CHECK: rorx	r22d, r18d, 123
@@ -143,7 +143,7 @@
          {evex}	sarx	r10d, edx, ecx
 
 # CHECK: {evex}	sarx	edx, dword ptr [rax + 4*rbx + 123], ecx
-# CHECK: encoding: [0x62,0xf2,0x76,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x76,0x08,0xf7,0x54,0x98,0x7b]
          {evex}	sarx	edx, dword ptr [rax + 4*rbx + 123], ecx
 
 # CHECK: {evex}	sarx	r11, r15, r9
@@ -151,7 +151,7 @@
          {evex}	sarx	r11, r15, r9
 
 # CHECK: {evex}	sarx	r15, qword ptr [rax + 4*rbx + 123], r9
-# CHECK: encoding: [0x62,0x72,0xb6,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb6,0x08,0xf7,0x7c,0x98,0x7b]
          {evex}	sarx	r15, qword ptr [rax + 4*rbx + 123], r9
 
 # CHECK: sarx	r26d, r22d, r18d
@@ -177,7 +177,7 @@
          {evex}	shlx	r10d, edx, ecx
 
 # CHECK: {evex}	shlx	edx, dword ptr [rax + 4*rbx + 123], ecx
-# CHECK: encoding: [0x62,0xf2,0x75,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x75,0x08,0xf7,0x54,0x98,0x7b]
          {evex}	shlx	edx, dword ptr [rax + 4*rbx + 123], ecx
 
 # CHECK: {evex}	shlx	r11, r15, r9
@@ -185,7 +185,7 @@
          {evex}	shlx	r11, r15, r9
 
 # CHECK: {evex}	shlx	r15, qword ptr [rax + 4*rbx + 123], r9
-# CHECK: encoding: [0x62,0x72,0xb5,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb5,0x08,0xf7,0x7c,0x98,0x7b]
          {evex}	shlx	r15, qword ptr [rax + 4*rbx + 123], r9
 
 # CHECK: shlx	r26d, r22d, r18d
@@ -211,7 +211,7 @@
          {evex}	shrx	r10d, edx, ecx
 
 # CHECK: {evex}	shrx	edx, dword ptr [rax + 4*rbx + 123], ecx
-# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf7,0x94,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0xf2,0x77,0x08,0xf7,0x54,0x98,0x7b]
          {evex}	shrx	edx, dword ptr [rax + 4*rbx + 123], ecx
 
 # CHECK: {evex}	shrx	r11, r15, r9
@@ -219,7 +219,7 @@
          {evex}	shrx	r11, r15, r9
 
 # CHECK: {evex}	shrx	r15, qword ptr [rax + 4*rbx + 123], r9
-# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf7,0xbc,0x98,0x7b,0x00,0x00,0x00]
+# CHECK: encoding: [0x62,0x72,0xb7,0x08,0xf7,0x7c,0x98,0x7b]
          {evex}	shrx	r15, qword ptr [rax + 4*rbx + 123], r9
 
 # CHECK: shrx	r26d, r22d, r18d

>From 199c97b8248fa93525ad969c9b96cca6a121c565 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 16 Jan 2024 23:14:34 -0800
Subject: [PATCH 2/5] rewrite bmi_pdep_pext

---
 llvm/lib/Target/X86/X86InstrMisc.td | 38 +++++++++++++++--------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 80cddc570b8427e..aaa877d0d329855 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1375,38 +1375,40 @@ let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
                              (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
 }
 
-multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
+multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t,
                          X86MemOperand x86memop, SDPatternOperator OpNode,
-                         PatFrag ld_frag, string Suffix = ""> {
-  def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-                    !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
-                  NoCD8, VVVV, Sched<[WriteALU]>;
-  def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
-                    !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
-                  NoCD8, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+                         PatFrag ld_frag, string suffix = ""> {
+  def rr#suffix : ITy<0xF5, MRMSrcReg, t, (outs t.RegClass:$dst),
+                      (ins t.RegClass:$src1, t.RegClass:$src2),
+                      mnemonic, binop_ndd_args,
+                      [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, t.RegClass:$src2))]>,
+                  VVVV, Sched<[WriteALU]>;
+  def rm#suffix : ITy<0xF5, MRMSrcMem, t, (outs t.RegClass:$dst),
+                      (ins t.RegClass:$src1, x86memop:$src2),
+                      mnemonic, binop_ndd_args,
+                      [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, (ld_frag addr:$src2)))]>,
+                  VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI2, NoEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, i32mem,
                                X86pdep, loadi32>, T8, XD, VEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, i64mem,
                                X86pdep, loadi64>, T8, XD, REX_W, VEX;
-  defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, i32mem,
                                X86pext, loadi32>, T8, XS, VEX;
-  defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, i64mem,
                                X86pext, loadi64>, T8, XS, REX_W, VEX;
 }
 
 let Predicates = [HasBMI2, HasEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, i32mem,
                                X86pdep, loadi32, "_EVEX">, T8, XD, EVEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, i64mem,
                                X86pdep, loadi64, "_EVEX">, T8, XD, REX_W, EVEX;
-  defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, i32mem,
                                X86pext, loadi32, "_EVEX">, T8, XS, EVEX;
-  defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, i64mem,
                                X86pext, loadi64, "_EVEX">, T8, XS, REX_W, EVEX;
 }
 

>From 4b5432b606110bd1ca65869f59aeae4efa7caae7 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 17 Jan 2024 00:02:14 -0800
Subject: [PATCH 3/5] remove redundant argu

---
 llvm/lib/Target/X86/X86InstrMisc.td | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index aaa877d0d329855..c0305ee7dc2e8ac 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1375,8 +1375,7 @@ let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
                              (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
 }
 
-multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t,
-                         X86MemOperand x86memop, SDPatternOperator OpNode,
+multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t, SDPatternOperator OpNode,
                          PatFrag ld_frag, string suffix = ""> {
   def rr#suffix : ITy<0xF5, MRMSrcReg, t, (outs t.RegClass:$dst),
                       (ins t.RegClass:$src1, t.RegClass:$src2),
@@ -1384,32 +1383,24 @@ multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t,
                       [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, t.RegClass:$src2))]>,
                   VVVV, Sched<[WriteALU]>;
   def rm#suffix : ITy<0xF5, MRMSrcMem, t, (outs t.RegClass:$dst),
-                      (ins t.RegClass:$src1, x86memop:$src2),
+                      (ins t.RegClass:$src1, t.MemOperand:$src2),
                       mnemonic, binop_ndd_args,
                       [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, (ld_frag addr:$src2)))]>,
                   VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI2, NoEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, i32mem,
-                               X86pdep, loadi32>, T8, XD, VEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, i64mem,
-                               X86pdep, loadi64>, T8, XD, REX_W, VEX;
-  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, i32mem,
-                               X86pext, loadi32>, T8, XS, VEX;
-  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, i64mem,
-                               X86pext, loadi64>, T8, XS, REX_W, VEX;
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, loadi32>, T8, XD, VEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, loadi64>, T8, XD, REX_W, VEX;
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, loadi32>, T8, XS, VEX;
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, loadi64>, T8, XS, REX_W, VEX;
 }
 
 let Predicates = [HasBMI2, HasEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, i32mem,
-                               X86pdep, loadi32, "_EVEX">, T8, XD, EVEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, i64mem,
-                               X86pdep, loadi64, "_EVEX">, T8, XD, REX_W, EVEX;
-  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, i32mem,
-                               X86pext, loadi32, "_EVEX">, T8, XS, EVEX;
-  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, i64mem,
-                               X86pext, loadi64, "_EVEX">, T8, XS, REX_W, EVEX;
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, loadi32, "_EVEX">, T8, XD, EVEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, loadi64, "_EVEX">, T8, XD, REX_W, EVEX;
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, loadi32, "_EVEX">, T8, XS, EVEX;
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, loadi64, "_EVEX">, T8, XS, REX_W, EVEX;
 }
 
 //===----------------------------------------------------------------------===//

>From db6b5765d2cb7207131b845bb992676638295570 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 17 Jan 2024 17:36:32 -0800
Subject: [PATCH 4/5] remove ld_frag

---
 llvm/lib/Target/X86/X86InstrMisc.td | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index c0305ee7dc2e8ac..7394ee49c1c53b8 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1376,7 +1376,7 @@ let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
 }
 
 multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t, SDPatternOperator OpNode,
-                         PatFrag ld_frag, string suffix = ""> {
+                         string suffix = ""> {
   def rr#suffix : ITy<0xF5, MRMSrcReg, t, (outs t.RegClass:$dst),
                       (ins t.RegClass:$src1, t.RegClass:$src2),
                       mnemonic, binop_ndd_args,
@@ -1385,22 +1385,22 @@ multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t, SDPatternOperator OpNod
   def rm#suffix : ITy<0xF5, MRMSrcMem, t, (outs t.RegClass:$dst),
                       (ins t.RegClass:$src1, t.MemOperand:$src2),
                       mnemonic, binop_ndd_args,
-                      [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, (ld_frag addr:$src2)))]>,
+                      [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, (t.LoadNode addr:$src2)))]>,
                   VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI2, NoEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, loadi32>, T8, XD, VEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, loadi64>, T8, XD, REX_W, VEX;
-  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, loadi32>, T8, XS, VEX;
-  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, loadi64>, T8, XS, REX_W, VEX;
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep>, T8, XD, VEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep>, T8, XD, REX_W, VEX;
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext>, T8, XS, VEX;
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext>, T8, XS, REX_W, VEX;
 }
 
 let Predicates = [HasBMI2, HasEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, loadi32, "_EVEX">, T8, XD, EVEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, loadi64, "_EVEX">, T8, XD, REX_W, EVEX;
-  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, loadi32, "_EVEX">, T8, XS, EVEX;
-  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, loadi64, "_EVEX">, T8, XS, REX_W, EVEX;
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, "_EVEX">, T8, XD, EVEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, "_EVEX">, T8, XD, REX_W, EVEX;
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, "_EVEX">, T8, XS, EVEX;
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, "_EVEX">, T8, XS, REX_W, EVEX;
 }
 
 //===----------------------------------------------------------------------===//

>From ecf8857abd9bc36e6b654b9e30be8d14037bfbc1 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Wed, 17 Jan 2024 20:09:45 -0800
Subject: [PATCH 5/5] extract T8 to class

---
 llvm/lib/Target/X86/X86InstrMisc.td | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 7394ee49c1c53b8..d0b049ff9d697a9 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1381,26 +1381,26 @@ multiclass bmi_pdep_pext<string mnemonic, X86TypeInfo t, SDPatternOperator OpNod
                       (ins t.RegClass:$src1, t.RegClass:$src2),
                       mnemonic, binop_ndd_args,
                       [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, t.RegClass:$src2))]>,
-                  VVVV, Sched<[WriteALU]>;
+                  T8, VVVV, Sched<[WriteALU]>;
   def rm#suffix : ITy<0xF5, MRMSrcMem, t, (outs t.RegClass:$dst),
                       (ins t.RegClass:$src1, t.MemOperand:$src2),
                       mnemonic, binop_ndd_args,
                       [(set t.RegClass:$dst, (OpNode t.RegClass:$src1, (t.LoadNode addr:$src2)))]>,
-                  VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+                  T8, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI2, NoEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep>, T8, XD, VEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep>, T8, XD, REX_W, VEX;
-  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext>, T8, XS, VEX;
-  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext>, T8, XS, REX_W, VEX;
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep>, XD, VEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep>, XD, REX_W, VEX;
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext>, XS, VEX;
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext>, XS, REX_W, VEX;
 }
 
 let Predicates = [HasBMI2, HasEGPR] in {
-  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, "_EVEX">, T8, XD, EVEX;
-  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, "_EVEX">, T8, XD, REX_W, EVEX;
-  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, "_EVEX">, T8, XS, EVEX;
-  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, "_EVEX">, T8, XS, REX_W, EVEX;
+  defm PDEP32 : bmi_pdep_pext<"pdep", Xi32, X86pdep, "_EVEX">, XD, EVEX;
+  defm PDEP64 : bmi_pdep_pext<"pdep", Xi64, X86pdep, "_EVEX">, XD, REX_W, EVEX;
+  defm PEXT32 : bmi_pdep_pext<"pext", Xi32, X86pext, "_EVEX">, XS, EVEX;
+  defm PEXT64 : bmi_pdep_pext<"pext", Xi64, X86pext, "_EVEX">, XS, REX_W, EVEX;
 }
 
 //===----------------------------------------------------------------------===//



More information about the llvm-commits mailing list