[llvm] [AMDGPU] Improve VGPR lowering test around FMA[AK|MK]. NFC (PR #170633)

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 02:08:52 PST 2025


https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/170633

>From 39e2f308665be1877afcf5b891184b101888311b Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 4 Dec 2025 01:42:10 -0800
Subject: [PATCH] [AMDGPU] Improve VGPR lowering test. NFC

Add asm comments checks for readability.
---
 .../CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir  | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 21f5515b7fb91..a2b5ef7771c09 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -260,50 +260,62 @@ body:             |
     $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr1, undef $vgpr0, undef $vgpr0, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4
     $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4041
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4
     $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4104
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=1 src2=0
     ; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/
     $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x401
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3
     $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x140
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1
     $vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4005
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=1 src2=0
     ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
     $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x554
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=1 src2=1
     ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
     $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x5410
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=1
     ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
     $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x1000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3
     $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_dual_fma_f32 v244 /*v500*/, v6, v7, v8 :: v_dual_add_f32 v3 /*v259*/, v4, v5
     $vgpr500, $vgpr259 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr7, 0, undef $vgpr8, 0, undef $vgpr4, 0, undef $vgpr5, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x40ae
+    ; ASM-SAME:                                         ;  msbs: dst=2 src0=2 src1=3 src2=2
     ; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
     $vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0xae54
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=1 src2=1
     ; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/
     $vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec
 
@@ -321,54 +333,67 @@ body:             |
     ; ASM: %bb.0:
 
     ; GCN-NEXT: s_set_vgpr_msb 0x45
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=1 src2=0
     ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2 /*v258*/, 0x1
     $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4505
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=1 src2=0
     ; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1
     $vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x541
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1
     $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4144
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=1 src2=0
     ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
     $vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4451
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=0 src2=1
     ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
     $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x5111
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=1
     ; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/
     $vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x1141
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2
     $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4150
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=1
     ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/
     $vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x5051
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=0 src2=1
     ; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/
     $vgpr260_vgpr261 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr258_vgpr259, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x5101
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3]
     $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr2_vgpr3, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x110
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=1
     ; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/
     $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr356_vgpr357, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x1040
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
     $vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_fmamk_f16 v26, v56, 0x1, v58
     $vgpr26 = V_FMAMK_F16_fake16 undef $vgpr56, 1, undef $vgpr58, implicit $exec, implicit $mode
 
@@ -396,6 +421,7 @@ body:             |
 
     ; Accumulation instructions apply DST to both the destination and one of the source VGPRs
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_fmac_f32_e64 v0 /*v256*/, |v0|, |v1| clamp mul:4
     $vgpr256 = V_FMAC_F32_e64 2, undef $vgpr0, 2, undef $vgpr1, 2, undef $vgpr256, 1, 2, implicit $mode, implicit $exec
 
@@ -420,18 +446,22 @@ body:             |
     ; DST applies to V0, SRC0 applies to V1, and SRC1 applies to V2.
 
     ; GCN-NEXT: s_set_vgpr_msb 1
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_lshlrev_b32_e64 v0, v0 /*v256*/, v2
     $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr256, undef $vgpr2, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x104
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=1 src2=0
     ; GCN-NEXT: v_lshlrev_b32_e64 v0, v1, v0 /*v256*/
     $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr1, undef $vgpr256, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x401
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2
     $vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x104
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=1 src2=0
     ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v1, v0 /*v256*/
     $vgpr0 = V_SUBREV_U32_e32 undef $vgpr1, undef $vgpr256, implicit $exec
 
@@ -448,10 +478,12 @@ body:             |
     ; ASM: %bb.0:
 
     ; GCN-NEXT: s_set_vgpr_msb 0x55
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=1 src2=1
     ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
     $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x5500
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v2
     $vgpr0 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec
 
@@ -459,6 +491,7 @@ body:             |
     $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v3 /*v259*/, v1
     $vgpr259 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
 
@@ -466,14 +499,17 @@ body:             |
     $vgpr256 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_fma_f32 v3, v4, v5, s2
     $vgpr3 = V_FMA_F32_e64 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $sgpr2, 0, 0, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 1
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_fma_f32 v3, v4 /*v260*/, v5, 1
     $vgpr3 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr5, 0, 1, 0, 0, implicit $exec, implicit $mode
 
     ; GCN-NEXT: s_set_vgpr_msb 0x104
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=1 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0, v1
     $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
 
@@ -481,6 +517,7 @@ body:             |
     $vgpr2 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr259, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x401
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0, v0 /*v256*/
     ; GCN-NEXT: v_add_nc_u32_e32 v1, v1 /*v257*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x105
@@ -513,6 +550,7 @@ body:             |
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
 
     ; Reset on fallthrough block end
@@ -522,6 +560,7 @@ body:             |
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: s_branch
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     S_BRANCH %bb.3
@@ -533,6 +572,7 @@ body:             |
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: s_swap_pc_i64
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     $exec = S_SWAPPC_B64 undef $sgpr0_sgpr1
@@ -542,6 +582,7 @@ body:             |
   bb.4:
     ; ASM-NEXT: %bb.4:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_endpgm
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -555,6 +596,7 @@ body:             |
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: s_set_pc_i64
     $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -573,8 +615,10 @@ body:             |
   bb.7:
     ; ASM-NEXT: %bb.7:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; ASM-NEXT: ; return to shader part epilog
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     SI_RETURN_TO_EPILOG undef $vgpr0, implicit-def $exec
@@ -591,8 +635,10 @@ body:             |
   bb.9:
     ; ASM-NEXT: %bb.9:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: s_set_pc_i64
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     S_SETPC_B64_return undef $sgpr0_sgpr1, implicit-def $exec
@@ -609,15 +655,19 @@ body:             |
   bb.0:
     ; ASM: %bb.0:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     $vgpr256 = V_MOV_B32_e32 undef $vgpr0, implicit $exec
 
   bb.1:
     ; ASM: .LBB{{[0-9]+}}_1:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: s_cbranch_scc0
     $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     S_CBRANCH_SCC0 %bb.1, undef implicit $scc
@@ -625,6 +675,7 @@ body:             |
   bb.2:
     ; ASM: %bb.2:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v2 /*v258*/, v2
     ; GCN-NEXT: s_endpgm
     $vgpr258 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
@@ -640,8 +691,10 @@ body:             |
   bb.0:
     ; ASM: %bb.0:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; ASM:      def v0
     ; GCN-NOT:  s_set_vgpr_msb
     ; ASM:      use v0
@@ -673,9 +726,11 @@ body:             |
   bb.0:
     ; ASM: %bb.0:
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_nop 0
     ; GCN-NEXT: s_set_vgpr_msb 0x4001
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v1, v0 /*v256*/
     BUNDLE implicit-def $vgpr256 {
       $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -700,6 +755,7 @@ body:             |
     ; s_set_vgpr_msb cannot be a first instruction in a clause and must be placed before it.
 
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: s_clause 0x2
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
@@ -718,6 +774,7 @@ body:             |
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v2, v1
     ; GCN-NEXT: v_mov_b32_e32 v3, v1
     BUNDLE implicit-def $vgpr256, implicit-def $vgpr257, implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr1 {
@@ -733,6 +790,7 @@ body:             |
     ; GCN-NEXT: s_clause 0x3
     ; GCN-NEXT: v_mov_b32_e32 v0, v1
     ; GCN-NEXT: s_set_vgpr_msb 64
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
     ; GCN-NEXT: v_mov_b32_e32 v2 /*v258*/, v1
     BUNDLE implicit-def $vgpr0, implicit-def $vgpr257, implicit-def $vgpr248, implicit undef $vgpr1 {
@@ -747,6 +805,7 @@ body:             |
     ; GCN-NEXT: s_clause 0x3e
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_set_vgpr_msb 0x4000
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v1, v1
     ; GCN-NEXT: v_mov_b32_e32 v2, v1
     ; GCN-COUNT-60: v_mov_b32_e32 v1, v1
@@ -847,6 +906,7 @@ body:             |
     ; ASM: %bb.0:
 
     ; GCN: s_set_vgpr_msb 5
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=1 src2=0
     ; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/
     $vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec
 
@@ -861,10 +921,12 @@ body:             |
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x500
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v1, v2
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 1
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[0:15], v[10:17], v1, v2
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
@@ -873,14 +935,17 @@ body:             |
     V_WMMA_LD_SCALE16_PAIRED_B64 undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x105
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=1 src2=0
     ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v[0:1], v[2:3]
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 0x500
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 src1=0 src2=0
     ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v[0:1], v[2:3]
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
     ; GCN-NEXT: s_set_vgpr_msb 1
+    ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 src1=0 src2=0
     ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[0:15], v[10:17], v[0:1], v[2:3]
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 ...



More information about the llvm-commits mailing list