[llvm] [AMDGPU] Ensure all WMMA instructions are marked as convergent (PR #178314)

Tue Jan 27 14:54:17 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: None (LU-JOHN)

<details>
<summary>Changes</summary>

This is an extension of https://github.com/llvm/llvm-project/pull/165602.  It is needed to fix an issue with V_WMMA_F32_16X16X16_F16_twoaddr_w32 being incorrectly sunk by machine-sink.

---
Full diff: https://github.com/llvm/llvm-project/pull/178314.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+1-1) 
- (added) llvm/test/CodeGen/AMDGPU/wmma-convergent.mir (+48) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ca7dfa734e94d..d303281ce0b8b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1396,7 +1396,7 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
   defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
 
   defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>;
-  let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+  let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
     let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in {
       def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
     }
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-convergent.mir b/llvm/test/CodeGen/AMDGPU/wmma-convergent.mir
new file mode 100644
index 0000000000000..76baaf4b7c422
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wmma-convergent.mir
@@ -0,0 +1,48 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic    -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN:   not grep "Number of machine instructions sunk"
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN:   not grep "Number of machine instructions sunk"
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic   -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN:   not grep "Number of machine instructions sunk"
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic   -run-pass=machine-sink -mattr=+wavefrontsize64 %s -stats 2>&1 | \
+# RUN:   not grep "Number of machine instructions sunk"
+# machine-sink must not sink WMMA* instructions.
+# Ensure that WMMA instructions are marked as convergent to prevent
+# machine-sink from sinking them.
+
+
+---
+name:            wmma_test_V_WMMA_F32_16X16X16_F16_twoaddr_w32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vsrc:vreg_256 = IMPLICIT_DEF
+    %ssrc:sreg_64 = IMPLICIT_DEF
+    early-clobber %vdst:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, implicit $exec    
+    %sdst:sreg_64 = SI_IF %ssrc:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+  bb.1:
+    %vcopy:vgpr_32 = COPY %vdst.sub0
+  bb.2:
+    SI_END_CF %sdst:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            wmma_test_V_WMMA_I32_16X16X16_IU8_twoaddr_w32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vsrc:vreg_128 = IMPLICIT_DEF
+    %vsrc2:vreg_256 = IMPLICIT_DEF    
+    %ssrc:sreg_64 = IMPLICIT_DEF
+    early-clobber %vdst:vreg_256 = V_WMMA_I32_16X16X16_IU8_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc2, 0, 0, 0, implicit $exec
+    %sdst:sreg_64 = SI_IF %ssrc:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+  bb.1:
+    %vcopy:vgpr_32 = COPY %vdst.sub0
+  bb.2:
+    SI_END_CF %sdst:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/178314