[llvm] [X86] Correct the cdisp8 encoding for VSCATTER/VGATHER prefetch (PR #122051)
Michael Clark via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 22:10:33 PST 2025
https://github.com/michaeljclark created https://github.com/llvm/llvm-project/pull/122051
during differential fuzzing, I found 8 more instructions with disp8 offset multiplier differences to binutils. somewhat sure there is a bug in the X86 LLVM disp8 offset multipliers for this subset of vector scatter and gather prefetch instructions. please check and refer to the previous pull request: https://github.com/llvm/llvm-project/pull/120340
these vector scatter and gather prefetch instructions also have an unusual k mask operand position but I have not addressed this with this patch as I am unsure how to change the Intel format in the tablegen file.
```
hex: 62 f2 fd 49 c6 4c 51 01
llvm: vgatherpf0dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours: vgatherpf0dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu: vgatherpf0dpd QWORD PTR [rcx+ymm2*2+0x8]{k1}
hex: 62 f2 7d 49 c7 4c 51 01
llvm: vgatherpf0qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours: vgatherpf0qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu: vgatherpf0qps DWORD PTR [rcx+zmm2*2+0x4]{k1}
hex: 62 f2 fd 49 c6 54 51 01
llvm: vgatherpf1dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours: vgatherpf1dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu: vgatherpf1dpd QWORD PTR [rcx+ymm2*2+0x8]{k1}
hex: 62 f2 7d 49 c7 54 51 01
llvm: vgatherpf1qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours: vgatherpf1qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu: vgatherpf1qps DWORD PTR [rcx+zmm2*2+0x4]{k1}
hex: 62 f2 fd 49 c6 6c 51 01
llvm: vscatterpf0dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours: vscatterpf0dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu: vscatterpf0dpd QWORD PTR [rcx+ymm2*2+0x8]{k1}
hex: 62 f2 7d 49 c7 6c 51 01
llvm: vscatterpf0qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours: vscatterpf0qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu: vscatterpf0qps DWORD PTR [rcx+zmm2*2+0x4]{k1}
hex: 62 f2 fd 49 c6 74 51 01
llvm: vscatterpf1dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours: vscatterpf1dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu: vscatterpf1dpd QWORD PTR [rcx+ymm2*2+0x8]{k1}
hex: 62 f2 7d 49 c7 74 51 01
llvm: vscatterpf1qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours: vscatterpf1qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu: vscatterpf1qps DWORD PTR [rcx+zmm2*2+0x4]{k1}
```
>From 41ff83259220ececd0c757232bce16636f864036 Mon Sep 17 00:00:00 2001
From: Michael Clark <michaeljclark at mac.com>
Date: Wed, 8 Jan 2025 18:33:46 +1300
Subject: [PATCH] [X86] Correct the cdisp8 encoding for VSCATTER/VGATHER
prefetch
cdisp8 encoding must match word size (single or double).
---
llvm/lib/Target/X86/X86InstrAVX512.td | 16 +++----
llvm/test/MC/X86/avx512pf-64-att.s | 64 +++++++++++++++++++++++++++
2 files changed, 72 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d6ca4b142afe0a..abf016000fc8eb 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10388,10 +10388,10 @@ defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
- VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
- VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10400,10 +10400,10 @@ defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
- VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
- VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10412,10 +10412,10 @@ defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
- VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
- VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10424,10 +10424,10 @@ defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
- VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
- VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
diff --git a/llvm/test/MC/X86/avx512pf-64-att.s b/llvm/test/MC/X86/avx512pf-64-att.s
index bae7fb0f235cc6..ef2d30ee21a20f 100644
--- a/llvm/test/MC/X86/avx512pf-64-att.s
+++ b/llvm/test/MC/X86/avx512pf-64-att.s
@@ -63,3 +63,67 @@ vscatterpf0qpd (%r14,%zmm14){%k7}
// CHECK: vscatterpf1qpd (%r15,%zmm13) {%k1}
// CHECK: encoding: [0x62,0x92,0xfd,0x49,0xc7,0x34,0x2f]
vscatterpf1qpd (%r15,%zmm13){%k1}
+
+// CHECK: vgatherpf0dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x4c,0x51,0x01]
+vgatherpf0dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vgatherpf0dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x4c,0x51,0x01]
+vgatherpf0dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf0qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x4c,0x51,0x01]
+vgatherpf0qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf0qps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x4c,0x51,0x01]
+vgatherpf0qps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf1dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x54,0x51,0x01]
+vgatherpf1dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vgatherpf1dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x54,0x51,0x01]
+vgatherpf1dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf1qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x54,0x51,0x01]
+vgatherpf1qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vgatherpf1qps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x54,0x51,0x01]
+vgatherpf1qps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf0dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x6c,0x51,0x01]
+vscatterpf0dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vscatterpf0dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x6c,0x51,0x01]
+vscatterpf0dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf0qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x6c,0x51,0x01]
+vscatterpf0qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf0qps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x6c,0x51,0x01]
+vscatterpf0qps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf1dpd 8(%rcx,%ymm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x74,0x51,0x01]
+vscatterpf1dpd 8(%rcx,%ymm2,2){%k1}
+
+// CHECK: vscatterpf1dps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x74,0x51,0x01]
+vscatterpf1dps 4(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf1qpd 8(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x74,0x51,0x01]
+vscatterpf1qpd 8(%rcx,%zmm2,2){%k1}
+
+// CHECK: vscatterpf1qps 4(%rcx,%zmm2,2) {%k1}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x74,0x51,0x01]
+vscatterpf1qps 4(%rcx,%zmm2,2){%k1}
More information about the llvm-commits
mailing list