[llvm] 212cba0 - [X86] Correct the cdisp8 encoding for VSCATTER/VGATHER prefetch (#122051)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 11 00:51:24 PST 2025


Author: Michael Clark
Date: 2025-01-11T16:51:20+08:00
New Revision: 212cba0ef37dd3b2a253c063240370de42fc67c1

URL: https://github.com/llvm/llvm-project/commit/212cba0ef37dd3b2a253c063240370de42fc67c1
DIFF: https://github.com/llvm/llvm-project/commit/212cba0ef37dd3b2a253c063240370de42fc67c1.diff

LOG: [X86] Correct the cdisp8 encoding for VSCATTER/VGATHER prefetch (#122051)

during differential fuzzing, I found 8 more instructions with disp8
offset multiplier differences to binutils. somewhat sure there is a bug
in the X86 LLVM disp8 offset multipliers for this subset of vector
scatter and gather prefetch instructions. please check and refer to the
previous pull request: https://github.com/llvm/llvm-project/pull/120340

these vector scatter and gather prefetch instructions also have an
unusual k mask operand position but I have not addressed this with this
patch as I am unsure how to change the Intel format in the tablegen
file.

```
hex:	62 f2 fd 49 c6 4c 51 01
llvm:	vgatherpf0dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vgatherpf0dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vgatherpf0dpd 	QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 4c 51 01
llvm:	vgatherpf0qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vgatherpf0qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vgatherpf0qps	DWORD PTR [rcx+zmm2*2+0x4]{k1}

hex:	62 f2 fd 49 c6 54 51 01
llvm:	vgatherpf1dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vgatherpf1dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vgatherpf1dpd	QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 54 51 01
llvm:	vgatherpf1qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vgatherpf1qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vgatherpf1qps	DWORD PTR [rcx+zmm2*2+0x4]{k1}

hex:	62 f2 fd 49 c6 6c 51 01
llvm:	vscatterpf0dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vscatterpf0dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vscatterpf0dpd	QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 6c 51 01
llvm:	vscatterpf0qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vscatterpf0qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vscatterpf0qps	DWORD PTR [rcx+zmm2*2+0x4]{k1}

hex:	62 f2 fd 49 c6 74 51 01
llvm:	vscatterpf1dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vscatterpf1dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vscatterpf1dpd QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 74 51 01
llvm:	vscatterpf1qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vscatterpf1qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vscatterpf1qps DWORD PTR [rcx+zmm2*2+0x4]{k1}
```

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/test/MC/X86/avx512pf-64-att.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d6ca4b142afe0a..abf016000fc8eb 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10388,10 +10388,10 @@ defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
-                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
-                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
 
 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10400,10 +10400,10 @@ defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
-                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
-                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
 
 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10412,10 +10412,10 @@ defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps
                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
-                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
-                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
 
 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10424,10 +10424,10 @@ defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps
                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
-                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
 
 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
-                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
 
 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
                      VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;

diff  --git a/llvm/test/MC/X86/avx512pf-64-att.s b/llvm/test/MC/X86/avx512pf-64-att.s
index bae7fb0f235cc6..ef2d30ee21a20f 100644
--- a/llvm/test/MC/X86/avx512pf-64-att.s
+++ b/llvm/test/MC/X86/avx512pf-64-att.s
@@ -63,3 +63,67 @@ vscatterpf0qpd (%r14,%zmm14){%k7}
 // CHECK: vscatterpf1qpd (%r15,%zmm13) {%k1}
 // CHECK: encoding: [0x62,0x92,0xfd,0x49,0xc7,0x34,0x2f]
 vscatterpf1qpd (%r15,%zmm13){%k1}
+
+// CHECK: vgatherpf0dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x4c,0x51,0x01]
+vgatherpf0dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vgatherpf0dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x4c,0x51,0x01]
+vgatherpf0dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf0qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x4c,0x51,0x01]
+vgatherpf0qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf0qps	4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x4c,0x51,0x01]
+vgatherpf0qps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf1dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x54,0x51,0x01]
+vgatherpf1dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vgatherpf1dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x54,0x51,0x01]
+vgatherpf1dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf1qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x54,0x51,0x01]
+vgatherpf1qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf1qps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x54,0x51,0x01]
+vgatherpf1qps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf0dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x6c,0x51,0x01]
+vscatterpf0dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vscatterpf0dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x6c,0x51,0x01]
+vscatterpf0dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf0qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x6c,0x51,0x01]
+vscatterpf0qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf0qps	4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x6c,0x51,0x01]
+vscatterpf0qps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf1dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x74,0x51,0x01]
+vscatterpf1dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vscatterpf1dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x74,0x51,0x01]
+vscatterpf1dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf1qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x74,0x51,0x01]
+vscatterpf1qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf1qps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x74,0x51,0x01]
+vscatterpf1qps 4(%rcx,%zmm2,2){%k1}


        


More information about the llvm-commits mailing list