[llvm] [AMDGPU] Precommit test for issue in amdgpu-rewrite-agpr-copy-mfma, (PR #168609)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 14:46:57 PST 2025
================
@@ -0,0 +1,217 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s
+# CHECK: Illegal virtual register for instruction
+# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register
+
+# Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand
+# in vgpr_32 register to agpr_32, not permitted by instruction format.
+--- |
+ define amdgpu_kernel void @test() #0 {
+ entry:
+ unreachable
+ }
+ attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
+...
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr2, $sgpr3, $vgpr0, $sgpr0_sgpr1
+
+ %0:av_32 = IMPLICIT_DEF
+ %1:av_128_align2 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:av_32 = IMPLICIT_DEF
+ %4:av_32 = IMPLICIT_DEF
+ %5:av_32 = IMPLICIT_DEF
+ %6:vgpr_32 = IMPLICIT_DEF
+ %7:vgpr_32 = IMPLICIT_DEF
+ %8:av_32 = IMPLICIT_DEF
+ %9:vgpr_32 = IMPLICIT_DEF
+ %10:av_32 = IMPLICIT_DEF
+ %11:av_32 = IMPLICIT_DEF
+ %12:av_32 = IMPLICIT_DEF
+ undef %13.sub0:vreg_128_align2 = IMPLICIT_DEF
+ %14:av_128_align2 = IMPLICIT_DEF
+ undef %15.sub0:vreg_128_align2 = IMPLICIT_DEF
+ %16:av_128_align2 = IMPLICIT_DEF
+ undef %17.sub0:av_128_align2 = IMPLICIT_DEF
+ %17.sub1:av_128_align2 = IMPLICIT_DEF
+ %17.sub2:av_128_align2 = IMPLICIT_DEF
+ %17.sub3:av_128_align2 = IMPLICIT_DEF
+ %18:av_128_align2 = IMPLICIT_DEF
+ undef %19.sub0:av_128_align2 = IMPLICIT_DEF
+ %19.sub1:av_128_align2 = IMPLICIT_DEF
+ %19.sub2:av_128_align2 = IMPLICIT_DEF
+ %19.sub3:av_128_align2 = IMPLICIT_DEF
+ %20:av_128_align2 = IMPLICIT_DEF
+ undef %21.sub0:av_128_align2 = IMPLICIT_DEF
+ %21.sub1:av_128_align2 = IMPLICIT_DEF
+ %21.sub2:av_128_align2 = IMPLICIT_DEF
+ %21.sub3:av_128_align2 = IMPLICIT_DEF
+ undef %22.sub0:vreg_128_align2 = IMPLICIT_DEF
+ undef %23.sub0:vreg_128_align2 = IMPLICIT_DEF
+ undef %24.sub0:vreg_128_align2 = IMPLICIT_DEF
+ undef %25.sub0:vreg_128_align2 = IMPLICIT_DEF
+ undef %26.sub0:vreg_128_align2 = IMPLICIT_DEF
+
+ bb.1:
+ liveins: $sgpr4, $sgpr5, $sgpr10, $sgpr11, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr20_sgpr21:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7:0x00000000000000F0, $sgpr8_sgpr9_sgpr10_sgpr11:0x000000000000000F, $sgpr12_sgpr13_sgpr14_sgpr15:0x00000000000000FF, $sgpr16_sgpr17_sgpr18_sgpr19:0x0000000000000003
+
+ %27:vgpr_32 = COPY %0
+ %28:vgpr_32 = V_ADD_U32_e32 %9, undef %27, implicit $exec
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = V_ADD_LSHL_U32_e64 %29, undef $sgpr22, 2, implicit $exec
+ %32:vgpr_32 = V_ADD_U32_e32 undef $sgpr23, %6, implicit $exec
+ %33:av_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = COPY %3
+ %37:vreg_128_align2 = IMPLICIT_DEF
+ %38:vgpr_32 = COPY %4
+ %39:vreg_128_align2 = IMPLICIT_DEF
+ %40:vreg_128_align2 = IMPLICIT_DEF
+ %41:vreg_128_align2 = IMPLICIT_DEF
+ %42:vreg_128_align2 = IMPLICIT_DEF
+ %43:vreg_128_align2 = IMPLICIT_DEF
+ %44:vreg_128_align2 = IMPLICIT_DEF
+ %45:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %1, undef %14, undef %37, 4, 4, undef %35, undef %38, 8, 12, implicit $mode, implicit $exec
+ %46:av_128_align2 = IMPLICIT_DEF
+ %47:vgpr_32 = COPY %5
+ %48:vreg_128_align2 = IMPLICIT_DEF
+ %49:vreg_128_align2 = IMPLICIT_DEF
+ %50:vreg_128_align2 = IMPLICIT_DEF
+ %51:vreg_128_align2 = IMPLICIT_DEF
+ %52:vreg_128_align2 = IMPLICIT_DEF
+ %53:vreg_128_align2 = IMPLICIT_DEF
+ %54:vreg_128_align2 = IMPLICIT_DEF
+ %55:vreg_128_align2 = IMPLICIT_DEF
+ %56:vreg_128_align2 = IMPLICIT_DEF
+ %57:vreg_128_align2 = IMPLICIT_DEF
+ %58:vreg_128_align2 = IMPLICIT_DEF
+ %59:vreg_128_align2 = IMPLICIT_DEF
+ %60:vreg_128_align2 = IMPLICIT_DEF
+ %61:vreg_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_128_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = COPY %10
+ %65:vreg_128_align2 = COPY %21
+ %66:vreg_128_align2 = IMPLICIT_DEF
+ %67:vreg_128_align2 = IMPLICIT_DEF
+ %68:vreg_128_align2 = IMPLICIT_DEF
+ %69:vreg_128_align2 = IMPLICIT_DEF
+ %70:vreg_128_align2 = IMPLICIT_DEF
+ %71:vreg_128_align2 = IMPLICIT_DEF
+ %72:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %25, 4, 4, undef %35, undef %36, 0, 0, implicit $mode, implicit $exec
+ %73:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %26, 4, 4, undef %35, undef %36, 0, 4, implicit $mode, implicit $exec
+ %74:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %23, 4, 4, undef %35, undef %38, 0, 0, implicit $mode, implicit $exec
+ %75:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %22, 4, 4, undef %35, undef %38, 0, 4, implicit $mode, implicit $exec
+ %76:vreg_128_align2 = IMPLICIT_DEF
+ %77:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %72, 4, 4, undef %35, undef %36, 8, 8, implicit $mode, implicit $exec
+ %78:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %73, 4, 4, undef %35, undef %36, 8, 12, implicit $mode, implicit $exec
+ %79:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %74, 4, 4, undef %35, undef %38, 8, 8, implicit $mode, implicit $exec
+ %80:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, %75, 4, 4, undef %35, undef %38, 8, 12, implicit $mode, implicit $exec
+ %81:vgpr_32 = COPY %11
+ %82:av_128_align2 = IMPLICIT_DEF
+ %83:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, %14, %24, 4, 4, undef %35, %2, 4, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %46, undef %14, undef %83, 4, 4, undef %35, %36, 12, 8, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = IMPLICIT_DEF
+ %89:vreg_128_align2 = IMPLICIT_DEF
+ %90:vreg_128_align2 = IMPLICIT_DEF
+ %91:vgpr_32 = COPY %12
+ %92:vreg_128_align2 = COPY %20
+ %93:vreg_128_align2 = COPY %19
+ %94:vreg_128_align2 = COPY %18
+ %95:vreg_128_align2 = COPY %17
+ %96:vreg_128_align2 = COPY %16
+ %97:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %15, 4, 4, undef %35, undef %38, 0, 0, implicit $mode, implicit $exec
+ %98:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %13, 4, 4, undef %35, undef %38, 0, 4, implicit $mode, implicit $exec
+ %99:vreg_128_align2 = IMPLICIT_DEF
+ %100:vreg_128_align2 = IMPLICIT_DEF
+ %101:vgpr_32 = V_LSHLREV_B32_e32 2, %34, implicit $exec
+ %102:av_32 = IMPLICIT_DEF
+ %4:av_32 = IMPLICIT_DEF
+ %103:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %37, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+ %104:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %39, 4, 4, undef %35, undef %28, 0, 4, implicit $mode, implicit $exec
+ %105:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %40, 4, 4, undef %35, undef %30, 0, 0, implicit $mode, implicit $exec
+ %106:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %41, 4, 4, undef %35, undef %30, 0, 4, implicit $mode, implicit $exec
+ %107:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %42, 4, 4, undef %35, undef %31, 0, 0, implicit $mode, implicit $exec
+ %108:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %43, 4, 4, undef %35, undef %31, 0, 4, implicit $mode, implicit $exec
+ %109:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %44, 4, 4, undef %35, undef %32, 0, 0, implicit $mode, implicit $exec
+ %110:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %45, 4, 4, undef %35, undef %32, 0, 4, implicit $mode, implicit $exec
+ %111:av_128_align2 = DS_READ_B128_gfx9 %38, -24576, 0, implicit $exec :: (load (s128), addrspace 3)
+ %112:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %48, 4, 4, undef %35, undef %28, 4, 0, implicit $mode, implicit $exec
+ %113:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %49, 4, 4, undef %35, undef %28, 4, 4, implicit $mode, implicit $exec
+ %114:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %50, 4, 4, undef %35, %30, 4, 0, implicit $mode, implicit $exec
+ %115:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %51, 4, 4, undef %35, undef %30, 4, 4, implicit $mode, implicit $exec
+ %116:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %52, 4, 4, undef %35, undef %31, 4, 0, implicit $mode, implicit $exec
+ %117:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %53, 4, 4, undef %35, undef %31, 4, 4, implicit $mode, implicit $exec
+ %118:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %54, 4, 4, undef %35, undef %32, 4, 0, implicit $mode, implicit $exec
+ %119:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %82, undef %14, %55, 4, 4, undef %35, undef %32, 4, 4, implicit $mode, implicit $exec
+ %120:av_128_align2 = DS_READ_B128_gfx9 %47, -22528, 0, implicit $exec :: (load (s128), addrspace 3)
+ %121:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %56, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+ %122:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %57, 4, 4, undef %35, undef %28, 0, 4, implicit $mode, implicit $exec
+ %123:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %58, 4, 4, undef %35, undef %30, 0, 0, implicit $mode, implicit $exec
+ %124:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %59, 4, 4, undef %35, undef %30, 0, 4, implicit $mode, implicit $exec
+ %125:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %60, 4, 4, undef %35, undef %31, 0, 0, implicit $mode, implicit $exec
+ %126:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %61, 4, 4, undef %35, undef %31, 0, 4, implicit $mode, implicit $exec
+ %127:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %62, 4, 4, undef %35, undef %32, 0, 0, implicit $mode, implicit $exec
+ %128:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %63, 4, 4, undef %35, undef %32, 0, 4, implicit $mode, implicit $exec
+ %129:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %65, 4, 4, undef %35, undef %28, 4, 0, implicit $mode, implicit $exec
+ %130:vreg_128_align2 = COPY %46
+ %131:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %66, 4, 4, undef %35, undef %30, 4, 0, implicit $mode, implicit $exec
+ %132:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %67, 4, 4, undef %35, undef %30, 4, 4, implicit $mode, implicit $exec
+ %133:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %68, 4, 4, undef %35, undef %31, 4, 0, implicit $mode, implicit $exec
+ %134:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %69, 4, 4, undef %35, undef %31, 4, 4, implicit $mode, implicit $exec
+ %135:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %70, 4, 4, undef %35, undef %32, 4, 0, implicit $mode, implicit $exec
+ %136:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %71, 4, 4, undef %35, undef %32, 4, 4, implicit $mode, implicit $exec
+ %137:av_128_align2 = DS_READ_B128_gfx9 %64, -14336, 0, implicit $exec :: (load (s128), addrspace 3)
+ %138:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %120, undef %14, %76, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+ %25:vreg_128_align2 = IMPLICIT_DEF
+ %26:vreg_128_align2 = IMPLICIT_DEF
+ %24:vreg_128_align2 = IMPLICIT_DEF
+ %23:vreg_128_align2 = IMPLICIT_DEF
+ %22:vreg_128_align2 = IMPLICIT_DEF
+ %139:av_128_align2 = DS_READ_B128_gfx9 %81, -8192, 0, implicit $exec :: (load (s128), addrspace 3)
+ %140:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %83, 4, 4, undef %35, undef %28, 4, 0, implicit $mode, implicit $exec
+ %141:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %84, 4, 4, undef %35, undef %28, 4, 4, implicit $mode, implicit $exec
+ %142:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %85, 4, 4, undef %35, undef %30, 4, 0, implicit $mode, implicit $exec
+ %143:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %86, 4, 4, undef %35, undef %30, 4, 4, implicit $mode, implicit $exec
+ %144:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %87, 4, 4, undef %35, undef %31, 4, 0, implicit $mode, implicit $exec
+ %145:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %88, 4, 4, undef %35, undef %31, 4, 4, implicit $mode, implicit $exec
+ %146:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %89, 4, 4, undef %35, undef %32, 4, 0, implicit $mode, implicit $exec
+ %147:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %137, undef %14, %90, 4, 4, undef %35, undef %32, 4, 4, implicit $mode, implicit $exec
+ %148:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %139, undef %14, %98, 4, 4, undef %35, undef %28, 0, 0, implicit $mode, implicit $exec
+ %149:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %139, undef %14, %99, 4, 4, undef %35, undef %28, 0, 4, implicit $mode, implicit $exec
+ %150:av_32 = COPY %7
+ %151:vreg_128_align2 = IMPLICIT_DEF
+ %152:vreg_128_align2 = COPY %82
+ %153:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %149, 4, 4, undef %35, undef %28, 8, 12, implicit $mode, implicit $exec
+ %20:av_128_align2 = IMPLICIT_DEF
+ %154:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 undef %111, undef %14, %151, 4, 4, %35, undef %101, 8, 8, implicit $mode, implicit $exec
----------------
arsenm wrote:
I assume this came out of llvm-reduce? Can you fix all of these undef flags on operands that really do have definitions?
https://github.com/llvm/llvm-project/pull/168609
More information about the llvm-commits
mailing list