[llvm] [AMDGPU] Fix SDWA commuting (PR #106920)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 1 12:12:12 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Yaxun (Sam) Liu (yxsamliu)
<details>
<summary>Changes</summary>
SDWA insts miss reverse opcode, which causes them to be treated as commutable with default reverse opcode i.e. their own opcode. As a result, SWDA F16 sub A, B and Sub B, A are merged by machine CSE. The correct behavior is to merged sub A, B and subrev B, A instead of sub B, A. This issues caused failures in rocFFT tests.
Another issue is that src0_sel and src1_sel are not swapped when SDWA insts are commuted.
Verified that this fixes rocFFT tests failure.
---
Full diff: https://github.com/llvm/llvm-project/pull/106920.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+7-4)
- (added) llvm/test/CodeGen/AMDGPU/sdwa-cse.mir (+56)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a857bdba53c3e8..43ec2b4484ec28 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2812,6 +2812,10 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
Src1, AMDGPU::OpName::src1_modifiers);
+ if (isSDWA(MI))
+ swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_sel, Src1,
+ AMDGPU::OpName::src1_sel);
+
CommutedMI->setDesc(get(CommutedOpcode));
}
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index fccaa27f361381..852d434cf743e8 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -174,10 +174,12 @@ multiclass VOP2Inst_e64<string opName,
multiclass VOP2Inst_sdwa<string opName,
VOPProfile P,
+ string revOp = opName,
bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
if P.HasExtSDWA then
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
} // End renamedInGFX9 = GFX9Renamed
}
@@ -188,7 +190,7 @@ multiclass VOP2Inst<string opName,
bit GFX9Renamed = 0> :
VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
- VOP2Inst_sdwa<opName, P, GFX9Renamed> {
+ VOP2Inst_sdwa<opName, P, revOp, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
if P.HasExtDPP then
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -237,7 +239,7 @@ multiclass VOP2Inst_VOPD<string opName,
bit GFX9Renamed = 0> :
VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
- VOP2Inst_sdwa<opName, P, GFX9Renamed> {
+ VOP2Inst_sdwa<opName, P, revOp, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
if P.HasExtDPP then
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -259,7 +261,8 @@ multiclass VOP2bInst <string opName,
}
if P.HasExtSDWA then
- def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
if P.HasExtDPP then
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-cse.mir b/llvm/test/CodeGen/AMDGPU/sdwa-cse.mir
new file mode 100644
index 00000000000000..1c12812cbcf527
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-cse.mir
@@ -0,0 +1,56 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=machine-cse -verify-machineinstrs %s -o - 2>&1 | FileCheck --check-prefix=GCN %s
+
+# GCN-LABEL: name: test_machine_cse_subtraction_sdwa_f16_no_merge
+# GCN: %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+# GCN: %3:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+# GCN: %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
+# GCN: %6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
+# GCN: DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
+---
+name: test_machine_cse_subtraction_sdwa_f16_no_merge
+body: |
+ bb.0:
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+ %3:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+ %4:vgpr_32 = IMPLICIT_DEF
+ %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
+ %6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
+ DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
+...
+
+# GCN-LABEL: name: test_machine_cse_subtraction_sdwa_f16_merge_same_src_sel
+# GCN: %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+# GCN: %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
+# GCN: DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %5, 0, 1, 0, implicit $exec
+---
+name: test_machine_cse_subtraction_sdwa_f16_merge_same_src_sel
+body: |
+ bb.0:
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+ %3:vgpr_32 = contract nofpexcept V_SUBREV_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
+ %4:vgpr_32 = IMPLICIT_DEF
+ %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
+ %6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
+ DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
+...
+
+# GCN-LABEL: name: test_machine_cse_subtraction_sdwa_f16_merge_diff_src_sel
+# GCN: %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 6, 5, implicit $mode, implicit $exec
+# GCN: %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
+# GCN: DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %5, 0, 1, 0, implicit $exec
+---
+name: test_machine_cse_subtraction_sdwa_f16_merge_diff_src_sel
+body: |
+ bb.0:
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 6, 5, implicit $mode, implicit $exec
+ %3:vgpr_32 = contract nofpexcept V_SUBREV_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 6, implicit $mode, implicit $exec
+ %4:vgpr_32 = IMPLICIT_DEF
+ %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
+ %6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
+ DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
``````````
</details>
https://github.com/llvm/llvm-project/pull/106920
More information about the llvm-commits
mailing list