[llvm] [AMDGPU] Fix wrong MSB encoding for V_FMAMK instructions (PR #168107)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 14 14:15:16 PST 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/168107
>From caf99afc1cb7abe07bd13de6bbf8ddf8ad60dca1 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 14 Nov 2025 14:17:55 -0500
Subject: [PATCH 1/5] [AMDGPU] Fix wrong MSB encoding for V_FMAMK instructions
These instructions use `src0`, `imm`, `src1` as operand.
Fixes SWDEV-566579.
---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 22 +++++++++++++---
.../CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir | 26 +++++++++++++++----
2 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 37bf2d2463ae2..771fde294a35e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3439,17 +3439,31 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
AMDGPU::OpName::vdstY};
+ // VOP2 MADMK instructions use src0, imm, src1 scheme.
+ static const AMDGPU::OpName VOP2MADMKOps[4] = {
+ AMDGPU::OpName::src0, AMDGPU::OpName::imm, AMDGPU::OpName::src1,
+ AMDGPU::OpName::vdst};
+
unsigned TSFlags = Desc.TSFlags;
if (TSFlags &
(SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
+ switch (Desc.getOpcode()) {
// LD_SCALE operands ignore MSB.
- if (Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32 ||
- Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250 ||
- Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64 ||
- Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250)
+ case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
+ case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
+ case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
+ case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
return {};
+ case AMDGPU::V_FMAMK_F32:
+ case AMDGPU::V_FMAMK_F32_gfx12:
+ case AMDGPU::V_FMAMK_F64:
+ case AMDGPU::V_FMAMK_F64_gfx1250:
+ return {VOP2MADMKOps, nullptr};
+ default:
+ break;
+ }
return {VOPOps, nullptr};
}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 7e1c28f8e7bbb..47b8232ae8582 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -332,23 +332,39 @@ body: |
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x4445
+ ; GCN-NEXT: s_set_vgpr_msb 0x4451
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x4505
+ ; GCN-NEXT: s_set_vgpr_msb 0x5111
; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x541
+ ; GCN-NEXT: s_set_vgpr_msb 0x1141
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x4144
+ ; GCN-NEXT: s_set_vgpr_msb 0x4150
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode
- ; ASM: NumVgprs: 259
+ ; GCN-NEXT: s_set_vgpr_msb 0x5051
+ ; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/
+ $vgpr260_vgpr261 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr258_vgpr259, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x5101
+ ; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3]
+ $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr2_vgpr3, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x110
+ ; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/
+ $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr356_vgpr357, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x1040
+ ; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
+ $vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode
+
+ ; ASM: NumVgprs: 358
...
>From 15a98b12a82878abd19eccfae22fb8bbe5f2f3aa Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 14 Nov 2025 15:04:34 -0500
Subject: [PATCH 2/5] use `AMDGPU::OpName::NUM_OPERAND_NAMES` replace
`AMDGPU::OpName::imm`
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 771fde294a35e..59b1be7671d0f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3441,8 +3441,8 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
// VOP2 MADMK instructions use src0, imm, src1 scheme.
static const AMDGPU::OpName VOP2MADMKOps[4] = {
- AMDGPU::OpName::src0, AMDGPU::OpName::imm, AMDGPU::OpName::src1,
- AMDGPU::OpName::vdst};
+ AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
+ AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
unsigned TSFlags = Desc.TSFlags;
>From 08bb907c4a60b343e00d8e64a523311dd03167a9 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 14 Nov 2025 15:19:39 -0500
Subject: [PATCH 3/5] add t16
---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 3 ++
.../AMDGPU/vgpr-lowering-gfx1250-t16.mir | 29 +++++++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 59b1be7671d0f..26f5edf49c234 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3456,6 +3456,9 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
return {};
+ case AMDGPU::V_FMAMK_F16:
+ case AMDGPU::V_FMAMK_F16_t16:
+ case AMDGPU::V_FMAMK_F16_t16_gfx12:
case AMDGPU::V_FMAMK_F32:
case AMDGPU::V_FMAMK_F32_gfx12:
case AMDGPU::V_FMAMK_F64:
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index 32cc398740d62..61bc890a38eee 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -64,3 +64,32 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/
$vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode
...
+
+# ASM-LABEL: {{^}}fmaak_fmamk:
+# DIS-LABEL: <fmaak_fmamk>:
+---
+name: fmaak_fmamk
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; ASM: %bb.0:
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x5051
+ ; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/
+ $vgpr260_lo16 = V_FMAMK_F16_t16 undef $vgpr356_lo16, 1, undef $vgpr258_lo16, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x5101
+ ; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3]
+ $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr356_lo16, 1, undef $vgpr2_lo16, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x110
+ ; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/
+ $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr356_lo16, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x1040
+ ; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
+ $vgpr256_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode
+
+ ; ASM: NumVgprs: 358
+
+...
>From 8d5f2162457bb9465a8fb84e8a9d44b4cb42e12a Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 14 Nov 2025 16:56:44 -0500
Subject: [PATCH 4/5] add t16 test
---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 ++
.../AMDGPU/vgpr-lowering-gfx1250-t16.mir | 23 +++++++++----------
2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 26f5edf49c234..aff4cfe1dc70e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3459,6 +3459,8 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
case AMDGPU::V_FMAMK_F16:
case AMDGPU::V_FMAMK_F16_t16:
case AMDGPU::V_FMAMK_F16_t16_gfx12:
+ case AMDGPU::V_FMAMK_F16_fake16:
+ case AMDGPU::V_FMAMK_F16_fake16_gfx12:
case AMDGPU::V_FMAMK_F32:
case AMDGPU::V_FMAMK_F32_gfx12:
case AMDGPU::V_FMAMK_F64:
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index 61bc890a38eee..b6d1ecc697e9c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -74,21 +74,20 @@ body: |
bb.0:
; ASM: %bb.0:
- ; GCN-NEXT: s_set_vgpr_msb 0x5051
- ; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/
- $vgpr260_lo16 = V_FMAMK_F16_t16 undef $vgpr356_lo16, 1, undef $vgpr258_lo16, implicit $exec, implicit $mode
+ ; GCN-NEXT: s_set_vgpr_msb 0
+ S_SET_VGPR_MSB 0, implicit $exec, implicit-def $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x5101
- ; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3]
- $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr356_lo16, 1, undef $vgpr2_lo16, implicit $exec, implicit $mode
+ ; GCN-NEXT: v_fmamk_f16 v26.l, v56.l, 0x1, v58.l
+ $vgpr26_lo16 = V_FMAMK_F16_t16 undef $vgpr56_lo16, 1, undef $vgpr58_lo16, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x110
- ; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/
- $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr356_lo16, implicit $exec, implicit $mode
+ ; GCN-NEXT: v_fmamk_f16 v0.l, v35.l, 0x1, v2.l
+ $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr35_lo16, 1, undef $vgpr2_lo16, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x1040
- ; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
- $vgpr256_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode
+ ; GCN-NEXT: v_fmamk_f16 v0.l, v2.l, 0x1, v6.l
+ $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr6_lo16, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: v_fmamk_f16 v5.l, v2.l, 0x1, v4.l
+ $vgpr5_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode
; ASM: NumVgprs: 358
>From e645aa1eaa7e80b779e83be11bfeb3f002a840c4 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 14 Nov 2025 17:14:02 -0500
Subject: [PATCH 5/5] more tests
---
.../CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir | 10 +++++++---
llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir | 13 +++++++++++++
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index b6d1ecc697e9c..d524aad01a902 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -74,9 +74,13 @@ body: |
bb.0:
; ASM: %bb.0:
- ; GCN-NEXT: s_set_vgpr_msb 0
- S_SET_VGPR_MSB 0, implicit $exec, implicit-def $mode
+ ; We use an extra instruction to set the MSB, and then we expect it to be reset to 0 (lower 16-bit).
+ ; GCN: s_set_vgpr_msb 0xcf
+ ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
+ $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: s_set_vgpr_msb 0xcf00
; GCN-NEXT: v_fmamk_f16 v26.l, v56.l, 0x1, v58.l
$vgpr26_lo16 = V_FMAMK_F16_t16 undef $vgpr56_lo16, 1, undef $vgpr58_lo16, implicit $exec, implicit $mode
@@ -89,6 +93,6 @@ body: |
; GCN-NEXT: v_fmamk_f16 v5.l, v2.l, 0x1, v4.l
$vgpr5_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode
- ; ASM: NumVgprs: 358
+ ; ASM: NumVgprs: 771
...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 47b8232ae8582..e8c27f2eb3685 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -364,6 +364,19 @@ body: |
; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
$vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode
+ ; GCN-NEXT: s_set_vgpr_msb 0x4000
+ ; GCN-NEXT: v_fmamk_f16 v26, v56, 0x1, v58
+ $vgpr26 = V_FMAMK_F16_fake16 undef $vgpr56, 1, undef $vgpr58, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: v_fmamk_f16 v0, v35, 0x1, v2
+ $vgpr0 = V_FMAMK_F16_fake16 undef $vgpr35, 1, undef $vgpr2, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: v_fmamk_f16 v0, v2, 0x1, v6
+ $vgpr0 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr6, implicit $exec, implicit $mode
+
+ ; GCN-NEXT: v_fmamk_f16 v5, v2, 0x1, v4
+ $vgpr5 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr4, implicit $exec, implicit $mode
+
; ASM: NumVgprs: 358
...
More information about the llvm-commits
mailing list