[llvm] ac00a11 - [AMDGPU] Ensure v_mfma_scale_f32_{16x16x128|32x32x64}_f8f6f4 instructions are convergent (#178627)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 29 06:46:42 PST 2026
Author: Frederik Harwath
Date: 2026-01-29T15:46:37+01:00
New Revision: ac00a1142f8af4cfbe8feaa2471290af66fed2c9
URL: https://github.com/llvm/llvm-project/commit/ac00a1142f8af4cfbe8feaa2471290af66fed2c9
DIFF: https://github.com/llvm/llvm-project/commit/ac00a1142f8af4cfbe8feaa2471290af66fed2c9.diff
LOG: [AMDGPU] Ensure v_mfma_scale_f32_{16x16x128|32x32x64}_f8f6f4 instructions are convergent (#178627)
The scaled variants of mfma instructions are not properly marked as
"convergent" and hence the machine-sink pass sinks them which is
incorrect.
This patch ensures that the instructions get marked as "convergent". The
new test also covers other mfma variants, but only the scale variants
are mistreated without the changes from this patch.
Added:
llvm/test/CodeGen/AMDGPU/mfma-convergent.mir
Modified:
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 9fb28ef97ec21..405e5dddba639 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -996,6 +996,7 @@ class MAIInst<string OpName, VOPProfile P, SDPatternOperator node, bit Scaled =
Instruction Opcode = !cast<Instruction>(NAME);
bit is_dgemm = 0;
bit is_gfx940_xdl = 0;
+ let isConvergent = 1;
let PseudoInstr = NAME; // FIXME: Why is this not the default
}
@@ -1033,7 +1034,7 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
defvar ProfileVGPR = !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD");
- let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
+ let mayRaiseFPException = 0, ReadsModeReg = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
def _e64 : MAIInst<OpName, ProfileAGPR,
@@ -1060,7 +1061,7 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">;
}
}
- } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
+ } // mayRaiseFPException = 0, ReadsModeReg = 1
}
// Provide a wrapper around MAIInst that provides the appended operands from V_MFMA_LD_SCALE_B32
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-convergent.mir b/llvm/test/CodeGen/AMDGPU/mfma-convergent.mir
new file mode 100644
index 0000000000000..8121f4281dbfa
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mfma-convergent.mir
@@ -0,0 +1,478 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-sink -o - %s | FileCheck %s
+# machine-sink must not sink MFMA instructions.
+# Ensure that MFMA instructions are marked as convergent to prevent
+# machine-sink from sinking them.
+
+---
+name: test_V_MFMA_F32_32X32X64_F8F6F4_f4_f4_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_32X32X64_F8F6F4_f4_f4_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:areg_512_align2 = nofpexcept V_MFMA_F32_32X32X64_F8F6F4_f4_f4_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:av_128_align2 = IMPLICIT_DEF
+ %vsrc1:av_128_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ %vdst:areg_512_align2 = nofpexcept V_MFMA_F32_32X32X64_F8F6F4_f4_f4_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_16X16X128_F8F6F4_f4_f4_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_16X16X128_F8F6F4_f4_f4_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:areg_128_align2 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f4_f4_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:av_128_align2 = IMPLICIT_DEF
+ %vsrc1:av_128_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc2:areg_128_align2 = IMPLICIT_DEF
+ %vdst:areg_128_align2 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f4_f4_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %scale_vsrc0:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %scale_vsrc2:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:vreg_128_align2 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %vsrc0, %vsrc1, %vsrc2, 4, 4, %scale_vsrc0, %scale_vsrc2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:av_128_align2 = IMPLICIT_DEF
+ %vsrc1:av_128_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc2:vreg_128_align2 = IMPLICIT_DEF
+ %scale_vsrc0:vgpr_32 = IMPLICIT_DEF
+ %scale_vsrc2:vgpr_32 = IMPLICIT_DEF
+ %vdst:vreg_128_align2 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %vsrc0, %vsrc1, %vsrc2, 4, 4, %scale_vsrc0, %scale_vsrc2, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_SCALE_F32_32X32X64_F8F6F4_f4_f4_mac_vgprcd_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_SCALE_F32_32X32X64_F8F6F4_f4_f4_mac_vgprcd_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:vreg_512_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %scale_vsrc0:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %scale_vsrc2:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:vreg_512_align2 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f4_f4_mac_vgprcd_e64 %vsrc0, %vsrc1, %vsrc2, 4, 4, %scale_vsrc0, %scale_vsrc2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:av_128_align2 = IMPLICIT_DEF
+ %vsrc1:av_128_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc2:vreg_512_align2 = IMPLICIT_DEF
+ %scale_vsrc0:vgpr_32 = IMPLICIT_DEF
+ %scale_vsrc2:vgpr_32 = IMPLICIT_DEF
+ %vdst:vreg_512_align2 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f4_f4_mac_vgprcd_e64 %vsrc0, %vsrc1, %vsrc2, 4, 4, %scale_vsrc0, %scale_vsrc2, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_4X4X1F32_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_4X4X1F32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vgpr_32 = IMPLICIT_DEF
+ %vsrc1:vgpr_32 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vgpr_32 = IMPLICIT_DEF
+ %vsrc2:areg_128_align2 = IMPLICIT_DEF
+ %vdst:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_32X32X1F32_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_32X32X1F32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_1024_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:areg_1024_align2 = V_MFMA_F32_32X32X1F32_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vgpr_32 = IMPLICIT_DEF
+ %vsrc1:vgpr_32 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vgpr_32 = IMPLICIT_DEF
+ %vsrc2:areg_1024_align2 = IMPLICIT_DEF
+ %vdst:areg_1024_align2 = V_MFMA_F32_32X32X1F32_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_32X32X8F16_mac_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_32X32X8F16_mac_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:areg_512_align2 = V_MFMA_F32_32X32X8F16_mac_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ %vdst:areg_512_align2 = V_MFMA_F32_32X32X8F16_mac_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F64_4X4X4F64_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F64_4X4X4F64_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc2:areg_64_align2 = IMPLICIT_DEF
+ %vdst:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_32X32X16_F16_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_32X32X16_F16_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:areg_512_align2 = V_MFMA_F32_32X32X16_F16_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vreg_128_align2 = IMPLICIT_DEF
+ %vsrc1:vreg_128_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vreg_128_align2 = IMPLICIT_DEF
+ %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ %vdst:areg_512_align2 = V_MFMA_F32_32X32X16_F16_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_32X32X16_BF8_BF8_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_32X32X16_BF8_BF8_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:areg_512_align2 = V_MFMA_F32_32X32X16_BF8_BF8_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ %vdst:areg_512_align2 = V_MFMA_F32_32X32X16_BF8_BF8_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_F32_16X16X16F16_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_F32_16X16X16F16_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vdst:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vreg_64_align2 = IMPLICIT_DEF
+ %vsrc2:areg_128_align2 = IMPLICIT_DEF
+ %vdst:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+
+---
+name: test_V_MFMA_I32_32X32X8I8_e64
+body: |
+ ; CHECK-LABEL: name: test_V_MFMA_I32_32X32X8I8_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc0:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc1:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:areg_512_align2 = V_MFMA_I32_32X32X8I8_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc0:vgpr_32 = IMPLICIT_DEF
+ %vsrc1:vgpr_32 = IMPLICIT_DEF
+ %ssrc:sreg_64 = IMPLICIT_DEF
+ %vsrc1:vgpr_32 = IMPLICIT_DEF
+ %vsrc2:areg_512_align2 = IMPLICIT_DEF
+ %vdst:areg_512_align2 = V_MFMA_I32_32X32X8I8_e64 %vsrc0, %vsrc1, %vsrc2, 0, 0, 0, implicit $mode, implicit $exec
+ %sdst:sreg_64 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_BRANCH %bb.2
+
+ bb.2: S_ENDPGM 0
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
More information about the llvm-commits
mailing list