[llvm] [AMDGPU] Ensure all WMMA instructions are marked as convergent (PR #178314)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 27 14:54:17 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (LU-JOHN)
<details>
<summary>Changes</summary>
This is an extension of https://github.com/llvm/llvm-project/pull/165602. It is needed to fix an issue with V_WMMA_F32_16X16X16_F16_twoaddr_w32 being incorrectly sunk by machine-sink.
---
Full diff: https://github.com/llvm/llvm-project/pull/178314.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+1-1)
- (added) llvm/test/CodeGen/AMDGPU/wmma-convergent.mir (+48)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ca7dfa734e94d..d303281ce0b8b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1396,7 +1396,7 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>;
- let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in {
def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-convergent.mir b/llvm/test/CodeGen/AMDGPU/wmma-convergent.mir
new file mode 100644
index 0000000000000..76baaf4b7c422
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wmma-convergent.mir
@@ -0,0 +1,48 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN: not grep "Number of machine instructions sunk"
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN: not grep "Number of machine instructions sunk"
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN: not grep "Number of machine instructions sunk"
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN: not grep "Number of machine instructions sunk"
+# machine-sink must not sink WMMA* instructions.
+# Ensure that WMMA instructions are marked as convergent to prevent
+# machine-sink from sinking them.
+
+
+---
+name: wmma_test_V_WMMA_F32_16X16X16_F16_twoaddr_w32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vsrc:vreg_256 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ early-clobber %vdst:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ %vcopy:vgpr_32 = COPY %vdst.sub0
+ bb.2:
+ SI_END_CF %sdst:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: wmma_test_V_WMMA_I32_16X16X16_IU8_twoaddr_w32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vsrc:vreg_128 = IMPLICIT_DEF
+ %vsrc2:vreg_256 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ early-clobber %vdst:vreg_256 = V_WMMA_I32_16X16X16_IU8_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc2, 0, 0, 0, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ %vcopy:vgpr_32 = COPY %vdst.sub0
+ bb.2:
+ SI_END_CF %sdst:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/178314
More information about the llvm-commits
mailing list