[llvm] [AMDGPU] Fix VGPR lowering for V_DUAL_FMAMK_F32 (PR #170567)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 14:11:33 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
Fixes: https://github.com/llvm/llvm-project/issues/170552
---
Full diff: https://github.com/llvm/llvm-project/pull/170567.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+11-2)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir (+6-2)
- (modified) llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir (+20)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 90f0b49ab9a78..bcfdb2ca5a3da 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3450,6 +3450,12 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
static const AMDGPU::OpName VOP2MADMKOps[4] = {
AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
+ static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
+ AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
+ AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
+ static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
+ AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
+ AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
unsigned TSFlags = Desc.TSFlags;
@@ -3491,8 +3497,11 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
if (TSFlags & SIInstrFlags::VIMAGE)
return {VIMGOps, nullptr};
- if (AMDGPU::isVOPD(Desc.getOpcode()))
- return {VOPDOpsX, VOPDOpsY};
+ if (AMDGPU::isVOPD(Desc.getOpcode())) {
+ auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
+ return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
+ (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
+ }
assert(!(TSFlags & SIInstrFlags::MIMG));
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index e8c27f2eb3685..21f5515b7fb91 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -283,11 +283,11 @@ body: |
; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
$vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x544
+ ; GCN-NEXT: s_set_vgpr_msb 0x554
; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x4410
+ ; GCN-NEXT: s_set_vgpr_msb 0x5410
; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
$vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
@@ -303,6 +303,10 @@ body: |
; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
$vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
+ ; GCN-NEXT: s_set_vgpr_msb 0xae54
+ ; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/
+ $vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec
+
; ASM: NumVgprs: 777
...
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
index b05edd046b874..05bbb0f54ef9e 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
@@ -4481,3 +4481,23 @@ body: |
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
...
+
+---
+name: vopd_no_combine_fmamk_src1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; SCHED-LABEL: name: vopd_no_combine_fmamk_src1
+ ; SCHED: $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
+ ; SCHED-NEXT: $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
+ ;
+ ; PAIR-LABEL: name: vopd_no_combine_fmamk_src1
+ ; PAIR: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ;
+ ; LOWER-LABEL: name: vopd_no_combine_fmamk_src1
+ ; LOWER: S_SET_VGPR_MSB 5, implicit-def $mode
+ ; LOWER-NEXT: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; LOWER-NEXT: S_SET_VGPR_MSB 1280, implicit-def $mode
+ $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
+ $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/170567
More information about the llvm-commits
mailing list