[llvm] [AMDGPU] Precommit test for issue in amdgpu-rewrite-agpr-copy-mfma, (PR #168609)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 19 14:46:57 PST 2025


================
@@ -0,0 +1,217 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s
+# CHECK: Illegal virtual register for instruction
+# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register
+
+# Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand
+# in vgpr_32 register to agpr_32, not permitted by instruction format.
+--- |
+  define amdgpu_kernel void @test() #0 {
+  entry:
+    unreachable
+  }
+  attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
+...
+---
+name:            test
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr2, $sgpr3, $vgpr0, $sgpr0_sgpr1
+
+    %0:av_32 = IMPLICIT_DEF
+    %1:av_128_align2 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:av_32 = IMPLICIT_DEF
+    %4:av_32 = IMPLICIT_DEF
+    %5:av_32 = IMPLICIT_DEF
+    %6:vgpr_32 = IMPLICIT_DEF
+    %7:vgpr_32 = IMPLICIT_DEF
+    %8:av_32 = IMPLICIT_DEF
+    %9:vgpr_32 = IMPLICIT_DEF
+    %10:av_32 = IMPLICIT_DEF
+    %11:av_32 = IMPLICIT_DEF
+    %12:av_32 = IMPLICIT_DEF
+    undef %13.sub0:vreg_128_align2 = IMPLICIT_DEF
+    %14:av_128_align2 = IMPLICIT_DEF
+    undef %15.sub0:vreg_128_align2 = IMPLICIT_DEF
+    %16:av_128_align2 = IMPLICIT_DEF
+    undef %17.sub0:av_128_align2 = IMPLICIT_DEF
+    %17.sub1:av_128_align2 = IMPLICIT_DEF
+    %17.sub2:av_128_align2 = IMPLICIT_DEF
+    %17.sub3:av_128_align2 = IMPLICIT_DEF
+    %18:av_128_align2 = IMPLICIT_DEF
+    undef %19.sub0:av_128_align2 = IMPLICIT_DEF
+    %19.sub1:av_128_align2 = IMPLICIT_DEF
+    %19.sub2:av_128_align2 = IMPLICIT_DEF
+    %19.sub3:av_128_align2 = IMPLICIT_DEF
+    %20:av_128_align2 = IMPLICIT_DEF
+    undef %21.sub0:av_128_align2 = IMPLICIT_DEF
+    %21.sub1:av_128_align2 = IMPLICIT_DEF
+    %21.sub2:av_128_align2 = IMPLICIT_DEF
+    %21.sub3:av_128_align2 = IMPLICIT_DEF
+    undef %22.sub0:vreg_128_align2 = IMPLICIT_DEF
+    undef %23.sub0:vreg_128_align2 = IMPLICIT_DEF
+    undef %24.sub0:vreg_128_align2 = IMPLICIT_DEF
+    undef %25.sub0:vreg_128_align2 = IMPLICIT_DEF
+    undef %26.sub0:vreg_128_align2 = IMPLICIT_DEF
+  
+  bb.1:
+    liveins: $sgpr4, $sgpr5, $sgpr10, $sgpr11, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr20_sgpr21:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7:0x00000000000000F0, $sgpr8_sgpr9_sgpr10_sgpr11:0x000000000000000F, $sgpr12_sgpr13_sgpr14_sgpr15:0x00000000000000FF, $sgpr16_sgpr17_sgpr18_sgpr19:0x0000000000000003
+  
+    %27:vgpr_32 = COPY %0
+    %28:vgpr_32 = V_ADD_U32_e32 %9, undef %27, implicit $exec
+    %29:vgpr_32 = IMPLICIT_DEF
+    %30:vgpr_32 = IMPLICIT_DEF
+    %31:vgpr_32 = V_ADD_LSHL_U32_e64 %29, undef $sgpr22, 2, implicit $exec
+    %32:vgpr_32 = V_ADD_U32_e32 undef $sgpr23, %6, implicit $exec
+    %33:av_32 = IMPLICIT_DEF
+    %34:vgpr_32 = IMPLICIT_DEF
+    %35:vgpr_32 = IMPLICIT_DEF
+    %36:vgpr_32 = COPY %3
+    %37:vreg_128_align2 = IMPLICIT_DEF
+    %38:vgpr_32 = COPY %4
+    %39:vreg_128_align2 = IMPLICIT_DEF
+    %40:vreg_128_align2 = IMPLICIT_DEF
+    %41:vreg_128_align2 = IMPLICIT_DEF
+    %42:vreg_128_align2 = IMPLICIT_DEF
+    %43:vreg_128_align2 = IMPLICIT_DEF
+    %44:vreg_128_align2 = IMPLICIT_DEF
+    %45:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %1, undef %14, undef %37, 4, 4, undef %35, undef %38, 8, 12, implicit $mode, implicit $exec
+    %46:av_128_align2 = IMPLICIT_DEF
+    %47:vgpr_32 = COPY %5
+    %48:vreg_128_align2 = IMPLICIT_DEF
+    %49:vreg_128_align2 = IMPLICIT_DEF
+    %50:vreg_128_align2 = IMPLICIT_DEF
+    %51:vreg_128_align2 = IMPLICIT_DEF
+    %52:vreg_128_align2 = IMPLICIT_DEF
+    %53:vreg_128_align2 = IMPLICIT_DEF
+    %54:vreg_128_align2 = IMPLICIT_DEF
+    %55:vreg_128_align2 = IMPLICIT_DEF
+    %56:vreg_128_align2 = IMPLICIT_DEF
+    %57:vreg_128_align2 = IMPLICIT_DEF
+    %58:vreg_128_align2 = IMPLICIT_DEF
+    %59:vreg_128_align2 = IMPLICIT_DEF
+    %60:vreg_128_align2 = IMPLICIT_DEF
+    %61:vreg_128_align2 = IMPLICIT_DEF
+    %62:vreg_128_align2 = IMPLICIT_DEF
+    %63:vreg_128_align2 = IMPLICIT_DEF
+    %64:vgpr_32 = COPY %10
+    %65:vreg_128_align2 = COPY %21
+    %66:vreg_128_align2 = IMPLICIT_DEF
+    %67:vreg_128_align2 = IMPLICIT_DEF
+    %68:vreg_128_align2 = IMPLICIT_DEF
+    %69:vreg_128_align2 = IMPLICIT_DEF
+    %70:vreg_128_align2 = IMPLICIT_DEF
+    %71:vreg_128_align2 = IMPLICIT_DEF
+    %72:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %25, 4, 4, undef %35, undef %36, 0, 0, implicit $mode, implicit $exec
+    %73:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %26, 4, 4, undef %35, undef %36, 0, 4, implicit $mode, implicit $exec
+    %74:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %23, 4, 4, undef %35, undef %38, 0, 0, implicit $mode, implicit $exec
+    %75:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %22, 4, 4, undef %35, undef %38, 0, 4, implicit $mode, implicit $exec
+    %76:vreg_128_align2 = IMPLICIT_DEF
+    %77:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %72, 4, 4, undef %35, undef %36, 8, 8, implicit $mode, implicit $exec
+    %78:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %73, 4, 4, undef %35, undef %36, 8, 12, implicit $mode, implicit $exec
+    %79:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %74, 4, 4, undef %35, undef %38, 8, 8, implicit $mode, implicit $exec
+    %80:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %75, 4, 4, undef %35, undef %38, 8, 12, implicit $mode, implicit $exec
+    %81:vgpr_32 = COPY %11
+    %82:av_128_align2 = IMPLICIT_DEF
+    %83:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, %14, %24, 4, 4, undef %35, %2, 4, 0, implicit $mode, implicit $exec
+    %84:vreg_128_align2 = IMPLICIT_DEF
+    %85:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, undef %83, 4, 4, undef %35, %36, 12, 8, implicit $mode, implicit $exec
+    %86:vreg_128_align2 = IMPLICIT_DEF
+    %87:vreg_128_align2 = IMPLICIT_DEF
+    %88:vreg_128_align2 = IMPLICIT_DEF
+    %89:vreg_128_align2 = IMPLICIT_DEF
+    %90:vreg_128_align2 = IMPLICIT_DEF
+    %91:vgpr_32 = COPY %12
+    %92:vreg_128_align2 = COPY %20
+    %93:vreg_128_align2 = COPY %19
+    %94:vreg_128_align2 = COPY %18
+    %95:vreg_128_align2 = COPY %17
+    %96:vreg_128_align2 = COPY %16
+    %97:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %15, 4, 4, undef %35, undef %38, 0, 0, implicit $mode, implicit $exec
+    %98:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %13, 4, 4, undef %35, undef %38, 0, 4, implicit $mode, implicit $exec
+    %99:vreg_128_align2 = IMPLICIT_DEF
+    %100:vreg_128_align2 = IMPLICIT_DEF
+    %101:vgpr_32 = V_LSHLREV_B32_e32 2, %34, implicit $exec
+    %102:av_32 = IMPLICIT_DEF
+    %4:av_32 = IMPLICIT_DEF
+    %103:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %37, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+    %104:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %39, 4, 4, undef %35, undef %28, 0, 4, implicit $mode, implicit $exec
+    %105:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %40, 4, 4, undef %35, undef %30, 0, 0, implicit $mode, implicit $exec
+    %106:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %41, 4, 4, undef %35, undef %30, 0, 4, implicit $mode, implicit $exec
+    %107:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %42, 4, 4, undef %35, undef %31, 0, 0, implicit $mode, implicit $exec
+    %108:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %43, 4, 4, undef %35, undef %31, 0, 4, implicit $mode, implicit $exec
+    %109:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %44, 4, 4, undef %35, undef %32, 0, 0, implicit $mode, implicit $exec
+    %110:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %45, 4, 4, undef %35, undef %32, 0, 4, implicit $mode, implicit $exec
+    %111:av_128_align2 = DS_READ_B128_gfx9 %38, -24576, 0, implicit $exec :: (load (s128), addrspace 3)
+    %112:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %48, 4, 4, undef %35, undef %28, 4, 0, implicit $mode, implicit $exec
+    %113:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %49, 4, 4, undef %35, undef %28, 4, 4, implicit $mode, implicit $exec
+    %114:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %50, 4, 4, undef %35, %30, 4, 0, implicit $mode, implicit $exec
+    %115:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %51, 4, 4, undef %35, undef %30, 4, 4, implicit $mode, implicit $exec
+    %116:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %52, 4, 4, undef %35, undef %31, 4, 0, implicit $mode, implicit $exec
+    %117:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %53, 4, 4, undef %35, undef %31, 4, 4, implicit $mode, implicit $exec
+    %118:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %54, 4, 4, undef %35, undef %32, 4, 0, implicit $mode, implicit $exec
+    %119:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %55, 4, 4, undef %35, undef %32, 4, 4, implicit $mode, implicit $exec
+    %120:av_128_align2 = DS_READ_B128_gfx9 %47, -22528, 0, implicit $exec :: (load (s128), addrspace 3)
+    %121:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %56, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+    %122:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %57, 4, 4, undef %35, undef %28, 0, 4, implicit $mode, implicit $exec
+    %123:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %58, 4, 4, undef %35, undef %30, 0, 0, implicit $mode, implicit $exec
+    %124:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %59, 4, 4, undef %35, undef %30, 0, 4, implicit $mode, implicit $exec
+    %125:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %60, 4, 4, undef %35, undef %31, 0, 0, implicit $mode, implicit $exec
+    %126:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %61, 4, 4, undef %35, undef %31, 0, 4, implicit $mode, implicit $exec
+    %127:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %62, 4, 4, undef %35, undef %32, 0, 0, implicit $mode, implicit $exec
+    %128:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %63, 4, 4, undef %35, undef %32, 0, 4, implicit $mode, implicit $exec
+    %129:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %65, 4, 4, undef %35, undef %28, 4, 0, implicit $mode, implicit $exec
+    %130:vreg_128_align2 = COPY %46
+    %131:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %66, 4, 4, undef %35, undef %30, 4, 0, implicit $mode, implicit $exec
+    %132:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %67, 4, 4, undef %35, undef %30, 4, 4, implicit $mode, implicit $exec
+    %133:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %68, 4, 4, undef %35, undef %31, 4, 0, implicit $mode, implicit $exec
+    %134:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %69, 4, 4, undef %35, undef %31, 4, 4, implicit $mode, implicit $exec
+    %135:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %70, 4, 4, undef %35, undef %32, 4, 0, implicit $mode, implicit $exec
+    %136:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %71, 4, 4, undef %35, undef %32, 4, 4, implicit $mode, implicit $exec
+    %137:av_128_align2 = DS_READ_B128_gfx9 %64, -14336, 0, implicit $exec :: (load (s128), addrspace 3)
+    %138:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %76, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+    %25:vreg_128_align2 = IMPLICIT_DEF
+    %26:vreg_128_align2 = IMPLICIT_DEF
+    %24:vreg_128_align2 = IMPLICIT_DEF
+    %23:vreg_128_align2 = IMPLICIT_DEF
+    %22:vreg_128_align2 = IMPLICIT_DEF
+    %139:av_128_align2 = DS_READ_B128_gfx9 %81, -8192, 0, implicit $exec :: (load (s128), addrspace 3)
+    %140:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %83, 4, 4, undef %35, undef %28, 4, 0, implicit $mode, implicit $exec
+    %141:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %84, 4, 4, undef %35, undef %28, 4, 4, implicit $mode, implicit $exec
+    %142:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %85, 4, 4, undef %35, undef %30, 4, 0, implicit $mode, implicit $exec
+    %143:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %86, 4, 4, undef %35, undef %30, 4, 4, implicit $mode, implicit $exec
+    %144:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %87, 4, 4, undef %35, undef %31, 4, 0, implicit $mode, implicit $exec
+    %145:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %88, 4, 4, undef %35, undef %31, 4, 4, implicit $mode, implicit $exec
+    %146:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %89, 4, 4, undef %35, undef %32, 4, 0, implicit $mode, implicit $exec
+    %147:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %90, 4, 4, undef %35, undef %32, 4, 4, implicit $mode, implicit $exec
+    %148:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %139, undef %14, %98, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+    %149:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %139, undef %14, %99, 4, 4, undef %35, undef %28, 0, 4, implicit $mode, implicit $exec
+    %150:av_32 = COPY %7
+    %151:vreg_128_align2 = IMPLICIT_DEF
+    %152:vreg_128_align2 = COPY %82
+    %153:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %149, 4, 4, undef %35, undef %28, 8, 12, implicit $mode, implicit $exec
+    %20:av_128_align2 = IMPLICIT_DEF
+    %154:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %151, 4, 4, %35, undef %101, 8, 8, implicit $mode, implicit $exec
----------------
arsenm wrote:

I assume this came out of llvm-reduce? Can you fix all of these undef flags on operands that really do have definitions? 

https://github.com/llvm/llvm-project/pull/168609


More information about the llvm-commits mailing list