[llvm] [AMDGPU] Avoid put implicit_def into bundle that break reg's liveness (PR #142563)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 05:10:48 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/142563
>From 3dc03fba1f04940d226459ca41dfc33573efa072 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 3 Jun 2025 17:11:27 +0800
Subject: [PATCH 1/5] avoid put implicit_def into bundle that break reg's
liveness
---
.../lib/Target/AMDGPU/SIInsertHardClauses.cpp | 2 +-
llvm/lib/Target/AMDGPU/SIPostRABundler.cpp | 3 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll | 940 +++++++++---------
.../CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll | 46 +-
.../AMDGPU/bundle-break-phy-liveness.mir | 31 +
5 files changed, 532 insertions(+), 490 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index eb0977b92d5ab..1f3e549b0e27f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -145,7 +145,7 @@ class SIInsertHardClauses {
// It's safe to treat the rest as illegal.
if (MI.getOpcode() == AMDGPU::S_NOP)
return HARDCLAUSE_INTERNAL;
- if (MI.isMetaInstruction())
+ if (MI.isMetaInstruction() && MI.getOpcode() != AMDGPU::IMPLICIT_DEF)
return HARDCLAUSE_IGNORE;
return HARDCLAUSE_ILLEGAL;
}
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index efdc55b8e68be..48f84286e9214 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -184,7 +184,8 @@ bool SIPostRABundler::run(MachineFunction &MF) {
if (I->getNumExplicitDefs() != 0)
Defs.insert(I->defs().begin()->getReg());
++ClauseLength;
- } else if (!I->isMetaInstruction()) {
+ } else if (!I->isMetaInstruction() ||
+ I->getOpcode() == AMDGPU::IMPLICIT_DEF) {
// Allow meta instructions in between bundle candidates, but do not
// start or end a bundle on one.
//
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index 44abfd272be88..1ab1b5b3b202d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -2,11 +2,11 @@
; FIXME: Currently block machineinstr verifier due to SI BUNDLE pass break physical register liveness. Should remove when the issue is fixed up
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs=0 < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs=0 < %s | FileCheck -check-prefix=VI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs=0 < %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define <32 x float> @bitcast_v32i32_to_v32f32(<32 x i32> %a, i32 %b) {
; SI-LABEL: bitcast_v32i32_to_v32f32:
@@ -4334,9 +4334,14 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr59
+; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
@@ -4438,18 +4443,45 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr59
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr53
+; VI-NEXT: ; implicit-def: $vgpr57
+; VI-NEXT: ; implicit-def: $vgpr56
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: ; implicit-def: $vgpr38
+; VI-NEXT: ; implicit-def: $vgpr52
+; VI-NEXT: ; implicit-def: $vgpr47
+; VI-NEXT: ; implicit-def: $vgpr46
+; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: ; implicit-def: $vgpr44
+; VI-NEXT: ; implicit-def: $vgpr51
+; VI-NEXT: ; implicit-def: $vgpr37
+; VI-NEXT: ; implicit-def: $vgpr43
+; VI-NEXT: ; implicit-def: $vgpr50
+; VI-NEXT: ; implicit-def: $vgpr63
+; VI-NEXT: ; implicit-def: $vgpr36
+; VI-NEXT: ; implicit-def: $vgpr62
+; VI-NEXT: ; implicit-def: $vgpr61
+; VI-NEXT: ; implicit-def: $vgpr49
+; VI-NEXT: ; implicit-def: $vgpr60
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: ; implicit-def: $vgpr55
+; VI-NEXT: ; implicit-def: $vgpr41
+; VI-NEXT: ; implicit-def: $vgpr40
+; VI-NEXT: ; implicit-def: $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: s_waitcnt vmcnt(6)
+; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr58
@@ -4479,43 +4511,12 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr53
-; VI-NEXT: ; implicit-def: $vgpr57
-; VI-NEXT: ; implicit-def: $vgpr56
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: ; implicit-def: $vgpr38
-; VI-NEXT: ; implicit-def: $vgpr52
-; VI-NEXT: ; implicit-def: $vgpr47
-; VI-NEXT: ; implicit-def: $vgpr46
-; VI-NEXT: ; implicit-def: $vgpr45
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: ; implicit-def: $vgpr51
-; VI-NEXT: ; implicit-def: $vgpr37
-; VI-NEXT: ; implicit-def: $vgpr43
-; VI-NEXT: ; implicit-def: $vgpr50
-; VI-NEXT: ; implicit-def: $vgpr63
-; VI-NEXT: ; implicit-def: $vgpr36
-; VI-NEXT: ; implicit-def: $vgpr62
-; VI-NEXT: ; implicit-def: $vgpr61
-; VI-NEXT: ; implicit-def: $vgpr49
-; VI-NEXT: ; implicit-def: $vgpr60
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: ; implicit-def: $vgpr55
-; VI-NEXT: ; implicit-def: $vgpr41
-; VI-NEXT: ; implicit-def: $vgpr40
-; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: ; implicit-def: $vgpr58
-; VI-NEXT: s_waitcnt vmcnt(14)
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; VI-NEXT: s_cbranch_execz .LBB12_2
; VI-NEXT: ; %bb.1: ; %cmp.false
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_lshrrev_b32_e32 v33, 8, v32
; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v31
@@ -4694,6 +4695,7 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; VI-NEXT: s_cbranch_execz .LBB12_4
; VI-NEXT: ; %bb.3: ; %cmp.true
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_add_u32_e32 v32, vcc, 3, v32
; VI-NEXT: v_add_u32_e32 v31, vcc, 3, v31
; VI-NEXT: v_lshrrev_b64 v[33:34], 24, v[31:32]
@@ -5293,9 +5295,16 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr40
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
@@ -5389,14 +5398,12 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; kill: killed $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr56
; GFX9-NEXT: ; implicit-def: $vgpr44
; GFX9-NEXT: ; implicit-def: $vgpr38
@@ -5428,15 +5435,15 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr42
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
@@ -5484,11 +5491,6 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(30)
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr33
-; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB12_2
@@ -42059,9 +42061,14 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr59
+; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
@@ -42163,18 +42170,45 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr59
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr53
+; VI-NEXT: ; implicit-def: $vgpr57
+; VI-NEXT: ; implicit-def: $vgpr56
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: ; implicit-def: $vgpr38
+; VI-NEXT: ; implicit-def: $vgpr52
+; VI-NEXT: ; implicit-def: $vgpr47
+; VI-NEXT: ; implicit-def: $vgpr46
+; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: ; implicit-def: $vgpr44
+; VI-NEXT: ; implicit-def: $vgpr51
+; VI-NEXT: ; implicit-def: $vgpr37
+; VI-NEXT: ; implicit-def: $vgpr43
+; VI-NEXT: ; implicit-def: $vgpr50
+; VI-NEXT: ; implicit-def: $vgpr63
+; VI-NEXT: ; implicit-def: $vgpr36
+; VI-NEXT: ; implicit-def: $vgpr62
+; VI-NEXT: ; implicit-def: $vgpr61
+; VI-NEXT: ; implicit-def: $vgpr49
+; VI-NEXT: ; implicit-def: $vgpr60
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: ; implicit-def: $vgpr55
+; VI-NEXT: ; implicit-def: $vgpr41
+; VI-NEXT: ; implicit-def: $vgpr40
+; VI-NEXT: ; implicit-def: $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: s_waitcnt vmcnt(6)
+; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr58
@@ -42204,43 +42238,12 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr53
-; VI-NEXT: ; implicit-def: $vgpr57
-; VI-NEXT: ; implicit-def: $vgpr56
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: ; implicit-def: $vgpr38
-; VI-NEXT: ; implicit-def: $vgpr52
-; VI-NEXT: ; implicit-def: $vgpr47
-; VI-NEXT: ; implicit-def: $vgpr46
-; VI-NEXT: ; implicit-def: $vgpr45
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: ; implicit-def: $vgpr51
-; VI-NEXT: ; implicit-def: $vgpr37
-; VI-NEXT: ; implicit-def: $vgpr43
-; VI-NEXT: ; implicit-def: $vgpr50
-; VI-NEXT: ; implicit-def: $vgpr63
-; VI-NEXT: ; implicit-def: $vgpr36
-; VI-NEXT: ; implicit-def: $vgpr62
-; VI-NEXT: ; implicit-def: $vgpr61
-; VI-NEXT: ; implicit-def: $vgpr49
-; VI-NEXT: ; implicit-def: $vgpr60
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: ; implicit-def: $vgpr55
-; VI-NEXT: ; implicit-def: $vgpr41
-; VI-NEXT: ; implicit-def: $vgpr40
-; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: ; implicit-def: $vgpr58
-; VI-NEXT: s_waitcnt vmcnt(14)
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; VI-NEXT: s_cbranch_execz .LBB36_2
; VI-NEXT: ; %bb.1: ; %cmp.false
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_lshrrev_b32_e32 v33, 8, v32
; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v31
@@ -42419,6 +42422,7 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; VI-NEXT: s_cbranch_execz .LBB36_4
; VI-NEXT: ; %bb.3: ; %cmp.true
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_add_f32_e32 v32, 1.0, v32
; VI-NEXT: v_add_f32_e32 v31, 1.0, v31
; VI-NEXT: v_lshrrev_b64 v[33:34], 24, v[31:32]
@@ -43018,9 +43022,16 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr40
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
@@ -43114,14 +43125,12 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; kill: killed $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr56
; GFX9-NEXT: ; implicit-def: $vgpr44
; GFX9-NEXT: ; implicit-def: $vgpr38
@@ -43153,15 +43162,15 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr42
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
@@ -43209,11 +43218,6 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(30)
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr33
-; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB36_2
@@ -79755,9 +79759,14 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr59
+; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
@@ -79859,18 +79868,45 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr59
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr53
+; VI-NEXT: ; implicit-def: $vgpr57
+; VI-NEXT: ; implicit-def: $vgpr56
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: ; implicit-def: $vgpr38
+; VI-NEXT: ; implicit-def: $vgpr52
+; VI-NEXT: ; implicit-def: $vgpr47
+; VI-NEXT: ; implicit-def: $vgpr46
+; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: ; implicit-def: $vgpr44
+; VI-NEXT: ; implicit-def: $vgpr51
+; VI-NEXT: ; implicit-def: $vgpr37
+; VI-NEXT: ; implicit-def: $vgpr43
+; VI-NEXT: ; implicit-def: $vgpr50
+; VI-NEXT: ; implicit-def: $vgpr63
+; VI-NEXT: ; implicit-def: $vgpr36
+; VI-NEXT: ; implicit-def: $vgpr62
+; VI-NEXT: ; implicit-def: $vgpr61
+; VI-NEXT: ; implicit-def: $vgpr49
+; VI-NEXT: ; implicit-def: $vgpr60
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: ; implicit-def: $vgpr55
+; VI-NEXT: ; implicit-def: $vgpr41
+; VI-NEXT: ; implicit-def: $vgpr40
+; VI-NEXT: ; implicit-def: $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: s_waitcnt vmcnt(6)
+; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr58
@@ -79900,43 +79936,12 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr58
; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr53
-; VI-NEXT: ; implicit-def: $vgpr57
-; VI-NEXT: ; implicit-def: $vgpr56
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: ; implicit-def: $vgpr38
-; VI-NEXT: ; implicit-def: $vgpr52
-; VI-NEXT: ; implicit-def: $vgpr47
-; VI-NEXT: ; implicit-def: $vgpr46
-; VI-NEXT: ; implicit-def: $vgpr45
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: ; implicit-def: $vgpr51
-; VI-NEXT: ; implicit-def: $vgpr37
-; VI-NEXT: ; implicit-def: $vgpr43
-; VI-NEXT: ; implicit-def: $vgpr50
-; VI-NEXT: ; implicit-def: $vgpr63
-; VI-NEXT: ; implicit-def: $vgpr36
-; VI-NEXT: ; implicit-def: $vgpr62
-; VI-NEXT: ; implicit-def: $vgpr61
-; VI-NEXT: ; implicit-def: $vgpr49
-; VI-NEXT: ; implicit-def: $vgpr60
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: ; implicit-def: $vgpr55
-; VI-NEXT: ; implicit-def: $vgpr41
-; VI-NEXT: ; implicit-def: $vgpr40
-; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: ; implicit-def: $vgpr58
-; VI-NEXT: s_waitcnt vmcnt(14)
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; VI-NEXT: s_cbranch_execz .LBB56_2
; VI-NEXT: ; %bb.1: ; %cmp.false
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_lshrrev_b32_e32 v33, 8, v32
; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v31
@@ -80145,6 +80150,7 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; VI-NEXT: v_addc_u32_e32 v28, vcc, 0, v28, vcc
; VI-NEXT: v_add_u32_e32 v29, vcc, 3, v29
; VI-NEXT: v_addc_u32_e32 v30, vcc, 0, v30, vcc
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_add_u32_e32 v31, vcc, 3, v31
; VI-NEXT: v_addc_u32_e32 v32, vcc, 0, v32, vcc
; VI-NEXT: v_lshrrev_b64 v[33:34], 24, v[31:32]
@@ -80714,9 +80720,16 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr40
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
@@ -80810,14 +80823,12 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; kill: killed $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr56
; GFX9-NEXT: ; implicit-def: $vgpr44
; GFX9-NEXT: ; implicit-def: $vgpr38
@@ -80849,15 +80860,15 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; GFX9-NEXT: ; kill: killed $vgpr40
; GFX9-NEXT: ; implicit-def: $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr40
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr42
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; kill: killed $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
@@ -80905,11 +80916,6 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(30)
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr33
-; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB56_2
@@ -115517,9 +115523,14 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr57
+; VI-NEXT: ; implicit-def: $vgpr56
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr56
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
@@ -115619,18 +115630,45 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr57
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr56
; VI-NEXT: ; kill: killed $vgpr39
; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr56
+; VI-NEXT: ; implicit-def: $vgpr55
+; VI-NEXT: ; implicit-def: $vgpr47
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr46
+; VI-NEXT: ; implicit-def: $vgpr53
+; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: ; implicit-def: $vgpr44
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: ; implicit-def: $vgpr52
+; VI-NEXT: ; implicit-def: $vgpr38
+; VI-NEXT: ; implicit-def: $vgpr43
+; VI-NEXT: ; implicit-def: $vgpr63
+; VI-NEXT: ; implicit-def: $vgpr62
+; VI-NEXT: ; implicit-def: $vgpr51
+; VI-NEXT: ; implicit-def: $vgpr37
+; VI-NEXT: ; implicit-def: $vgpr61
+; VI-NEXT: ; implicit-def: $vgpr50
+; VI-NEXT: ; implicit-def: $vgpr60
+; VI-NEXT: ; implicit-def: $vgpr59
+; VI-NEXT: ; implicit-def: $vgpr36
+; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr49
+; VI-NEXT: ; kill: killed $vgpr39
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: ; implicit-def: $vgpr41
+; VI-NEXT: ; implicit-def: $vgpr40
+; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr56
+; VI-NEXT: s_waitcnt vmcnt(6)
+; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr56
@@ -115663,43 +115701,12 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr56
; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr55
-; VI-NEXT: ; implicit-def: $vgpr47
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr46
-; VI-NEXT: ; implicit-def: $vgpr53
-; VI-NEXT: ; implicit-def: $vgpr45
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: ; implicit-def: $vgpr52
-; VI-NEXT: ; implicit-def: $vgpr38
-; VI-NEXT: ; implicit-def: $vgpr43
-; VI-NEXT: ; implicit-def: $vgpr63
-; VI-NEXT: ; implicit-def: $vgpr62
-; VI-NEXT: ; implicit-def: $vgpr51
-; VI-NEXT: ; implicit-def: $vgpr37
-; VI-NEXT: ; implicit-def: $vgpr61
-; VI-NEXT: ; implicit-def: $vgpr50
-; VI-NEXT: ; implicit-def: $vgpr60
-; VI-NEXT: ; implicit-def: $vgpr59
-; VI-NEXT: ; implicit-def: $vgpr36
-; VI-NEXT: ; implicit-def: $vgpr58
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr49
-; VI-NEXT: ; kill: killed $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: ; implicit-def: $vgpr41
-; VI-NEXT: ; implicit-def: $vgpr40
-; VI-NEXT: ; implicit-def: $vgpr39
; VI-NEXT: ; implicit-def: $vgpr56
-; VI-NEXT: s_waitcnt vmcnt(14)
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; VI-NEXT: s_cbranch_execz .LBB72_2
; VI-NEXT: ; %bb.1: ; %cmp.false
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v32
; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v33, 8, v32
@@ -115878,6 +115885,7 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; VI-NEXT: s_cbranch_execz .LBB72_4
; VI-NEXT: ; %bb.3: ; %cmp.true
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_add_f64 v[31:32], v[31:32], 1.0
; VI-NEXT: v_add_f64 v[29:30], v[29:30], 1.0
; VI-NEXT: v_add_f64 v[27:28], v[27:28], 1.0
@@ -116464,9 +116472,16 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr41
; GFX9-NEXT: ; kill: killed $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr41
@@ -116560,14 +116575,12 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr48
; GFX9-NEXT: ; kill: killed $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr41
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; kill: killed $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; kill: killed $vgpr48
; GFX9-NEXT: ; implicit-def: $vgpr48
; GFX9-NEXT: ; kill: killed $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr41
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: ; implicit-def: $vgpr46
; GFX9-NEXT: ; implicit-def: $vgpr54
; GFX9-NEXT: ; implicit-def: $vgpr45
@@ -116599,15 +116612,13 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr48
; GFX9-NEXT: ; kill: killed $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr41
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr42
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
+; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
@@ -116659,9 +116670,6 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(32)
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
-; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB72_2
@@ -164579,12 +164587,16 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr34
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: ; implicit-def: $vgpr44
; VI-NEXT: ; implicit-def: $vgpr38
; VI-NEXT: ; implicit-def: $vgpr55
; VI-NEXT: ; implicit-def: $vgpr54
@@ -164609,52 +164621,11 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: ; implicit-def: $vgpr52
; VI-NEXT: ; implicit-def: $vgpr37
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: ; implicit-def: $vgpr56
; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr56
-; VI-NEXT: ; implicit-def: $vgpr34
-; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; VI-NEXT: s_waitcnt vmcnt(14)
+; VI-NEXT: s_waitcnt vmcnt(6)
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: ; kill: killed $vgpr33
@@ -164763,6 +164734,43 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr33
; VI-NEXT: ; kill: killed $vgpr33
; VI-NEXT: ; implicit-def: $vgpr33
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr34
+; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr44
+; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; VI-NEXT: s_cbranch_execz .LBB90_2
@@ -164897,6 +164905,7 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; VI-NEXT: v_lshrrev_b64 v[44:45], 24, v[1:2]
; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_lshrrev_b64 v[44:45], 24, v[31:32]
; VI-NEXT: v_lshrrev_b32_e32 v46, 24, v12
; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
@@ -166116,10 +166125,60 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr59
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr59
+; GFX9-NEXT: ; implicit-def: $vgpr59
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr59
+; GFX9-NEXT: ; implicit-def: $vgpr59
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr59
+; GFX9-NEXT: ; implicit-def: $vgpr59
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: ; kill: killed $vgpr59
+; GFX9-NEXT: ; implicit-def: $vgpr59
+; GFX9-NEXT: ; kill: killed $vgpr58
+; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: v_mov_b32_e32 v46, v15
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: ; kill: killed $vgpr59
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
@@ -166129,86 +166188,42 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; implicit-def: $vgpr59
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr59
-; GFX9-NEXT: ; implicit-def: $vgpr59
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr59
-; GFX9-NEXT: ; implicit-def: $vgpr59
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
-; GFX9-NEXT: ; kill: killed $vgpr59
-; GFX9-NEXT: ; implicit-def: $vgpr59
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: v_mov_b32_e32 v47, v16
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr36
-; GFX9-NEXT: ; kill: killed $vgpr59
-; GFX9-NEXT: ; implicit-def: $vgpr59
-; GFX9-NEXT: ; kill: killed $vgpr58
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; implicit-def: $vgpr41
; GFX9-NEXT: ; implicit-def: $vgpr38
; GFX9-NEXT: ; implicit-def: $vgpr51
@@ -166239,15 +166254,25 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr57
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr63
-; GFX9-NEXT: ; kill: killed $vgpr59
-; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr58
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
+; GFX9-NEXT: ; kill: killed $vgpr15
+; GFX9-NEXT: ; implicit-def: $vgpr15
; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
@@ -166303,21 +166328,6 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(34)
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
-; GFX9-NEXT: ; implicit-def: $vgpr15
-; GFX9-NEXT: ; kill: killed $vgpr15
-; GFX9-NEXT: ; implicit-def: $vgpr15
-; GFX9-NEXT: ; kill: killed $vgpr15
-; GFX9-NEXT: ; implicit-def: $vgpr15
-; GFX9-NEXT: ; kill: killed $vgpr15
-; GFX9-NEXT: ; implicit-def: $vgpr15
-; GFX9-NEXT: ; kill: killed $vgpr15
-; GFX9-NEXT: ; implicit-def: $vgpr15
-; GFX9-NEXT: ; kill: killed $vgpr15
-; GFX9-NEXT: ; implicit-def: $vgpr15
-; GFX9-NEXT: ; kill: killed $vgpr15
-; GFX9-NEXT: ; implicit-def: $vgpr15
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB90_2
@@ -192601,9 +192611,12 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v27
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v25
+; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v23
+; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v14
; VI-NEXT: v_lshrrev_b32_e32 v48, 16, v16
; VI-NEXT: v_lshrrev_b32_e32 v39, 16, v15
-; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v14
; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v12
; VI-NEXT: v_lshrrev_b32_e32 v62, 16, v11
; VI-NEXT: v_lshrrev_b32_e32 v63, 16, v10
@@ -192613,19 +192626,20 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; VI-NEXT: v_lshrrev_b32_e32 v55, 16, v2
; VI-NEXT: v_lshrrev_b32_e32 v56, 16, v28
; VI-NEXT: v_lshrrev_b32_e32 v58, 16, v26
-; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v57, 16, v24
-; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v23
; VI-NEXT: v_lshrrev_b32_e32 v59, 16, v22
; VI-NEXT: v_lshrrev_b32_e32 v38, 16, v21
; VI-NEXT: v_lshrrev_b32_e32 v53, 16, v20
; VI-NEXT: v_lshrrev_b32_e32 v43, 16, v19
; VI-NEXT: v_lshrrev_b32_e32 v41, 16, v18
; VI-NEXT: v_lshrrev_b32_e32 v46, 16, v17
-; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr52
; VI-NEXT: ; implicit-def: $vgpr45
; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: ; implicit-def: $vgpr50
+; VI-NEXT: ; implicit-def: $vgpr40
+; VI-NEXT: ; implicit-def: $vgpr49
+; VI-NEXT: ; implicit-def: $vgpr35
; VI-NEXT: s_waitcnt vmcnt(13)
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31
; VI-NEXT: ; implicit-def: $vgpr31
@@ -192723,15 +192737,15 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: ; kill: killed $vgpr31
; VI-NEXT: ; implicit-def: $vgpr31
-; VI-NEXT: s_waitcnt vmcnt(12)
-; VI-NEXT: v_lshrrev_b32_e32 v36, 16, v61
-; VI-NEXT: s_waitcnt vmcnt(11)
-; VI-NEXT: v_lshrrev_b32_e32 v51, 16, v60
; VI-NEXT: ; kill: killed $vgpr31
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr31
+; VI-NEXT: s_waitcnt vmcnt(14)
+; VI-NEXT: v_lshrrev_b32_e32 v36, 16, v61
+; VI-NEXT: s_waitcnt vmcnt(13)
+; VI-NEXT: v_lshrrev_b32_e32 v51, 16, v60
; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr31
@@ -192743,15 +192757,11 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr50
-; VI-NEXT: ; implicit-def: $vgpr31
-; VI-NEXT: ; implicit-def: $vgpr40
-; VI-NEXT: ; implicit-def: $vgpr49
-; VI-NEXT: ; implicit-def: $vgpr35
; VI-NEXT: ; implicit-def: $vgpr32
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr32
+; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr32
@@ -193648,9 +193658,17 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr53
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr44
+; GFX9-NEXT: ; implicit-def: $vgpr53
+; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
@@ -193710,10 +193728,8 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: ; implicit-def: $vgpr44
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: ; implicit-def: $vgpr53
; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr57
@@ -193740,41 +193756,14 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr52
; GFX9-NEXT: ; implicit-def: $vgpr51
; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr53
; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr54
-; GFX9-NEXT: ; implicit-def: $vgpr53
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(18)
+; GFX9-NEXT: s_waitcnt vmcnt(8)
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; kill: killed $vgpr33
@@ -193833,6 +193822,27 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr54
+; GFX9-NEXT: ; implicit-def: $vgpr53
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB94_2
@@ -216309,12 +216319,74 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v27
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v26
+; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v25
+; VI-NEXT: ; kill: killed $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v24
+; VI-NEXT: ; kill: killed $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v23
+; VI-NEXT: ; kill: killed $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v22
+; VI-NEXT: v_lshrrev_b32_e32 v49, 16, v21
+; VI-NEXT: ; kill: killed $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr55
+; VI-NEXT: ; kill: killed $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
; VI-NEXT: v_lshrrev_b32_e32 v60, 16, v16
; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v14
; VI-NEXT: v_lshrrev_b32_e32 v63, 16, v12
@@ -216324,20 +216396,34 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; VI-NEXT: v_lshrrev_b32_e32 v56, 16, v4
; VI-NEXT: v_lshrrev_b32_e32 v57, 16, v2
; VI-NEXT: v_lshrrev_b32_e32 v46, 16, v30
-; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v22
-; VI-NEXT: v_lshrrev_b32_e32 v49, 16, v21
; VI-NEXT: v_lshrrev_b32_e32 v53, 16, v20
; VI-NEXT: v_lshrrev_b32_e32 v51, 16, v19
; VI-NEXT: v_lshrrev_b32_e32 v52, 16, v18
-; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr32
+; VI-NEXT: ; implicit-def: $vgpr62
+; VI-NEXT: ; implicit-def: $vgpr38
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: ; implicit-def: $vgpr61
+; VI-NEXT: ; implicit-def: $vgpr58
+; VI-NEXT: ; implicit-def: $vgpr39
+; VI-NEXT: ; kill: killed $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr35
+; VI-NEXT: ; implicit-def: $vgpr43
+; VI-NEXT: ; implicit-def: $vgpr40
+; VI-NEXT: ; implicit-def: $vgpr45
+; VI-NEXT: ; implicit-def: $vgpr44
+; VI-NEXT: ; implicit-def: $vgpr41
; VI-NEXT: s_waitcnt vmcnt(14)
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31
; VI-NEXT: v_lshrrev_b32_e32 v31, 16, v37
; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v36
; VI-NEXT: v_lshrrev_b32_e32 v31, 16, v17
; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: ; kill: killed $vgpr31
; VI-NEXT: ; implicit-def: $vgpr31
@@ -216408,7 +216494,6 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; VI-NEXT: ; kill: killed $vgpr31
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: ; kill: killed $vgpr31
-; VI-NEXT: ; implicit-def: $vgpr32
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: ; kill: killed $vgpr31
; VI-NEXT: ; implicit-def: $vgpr31
@@ -216418,88 +216503,17 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: ; kill: killed $vgpr31
; VI-NEXT: ; implicit-def: $vgpr31
+; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v36
; VI-NEXT: ; kill: killed $vgpr31
-; VI-NEXT: ; implicit-def: $vgpr55
; VI-NEXT: ; implicit-def: $vgpr31
; VI-NEXT: ; kill: killed $vgpr31
-; VI-NEXT: ; implicit-def: $vgpr62
; VI-NEXT: ; implicit-def: $vgpr31
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr38
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: ; implicit-def: $vgpr61
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr58
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr39
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; kill: killed $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr35
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; kill: killed $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; kill: killed $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; kill: killed $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: ; kill: killed $vgpr48
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; kill: killed $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr43
-; VI-NEXT: ; implicit-def: $vgpr40
-; VI-NEXT: ; implicit-def: $vgpr48
-; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr54
+; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr54
@@ -216508,14 +216522,10 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr45
-; VI-NEXT: ; implicit-def: $vgpr44
-; VI-NEXT: ; implicit-def: $vgpr41
-; VI-NEXT: ; implicit-def: $vgpr54
-; VI-NEXT: ; implicit-def: $vgpr48
; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr48
+; VI-NEXT: ; implicit-def: $vgpr54
; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr48
@@ -217510,9 +217520,17 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr53
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr44
+; GFX9-NEXT: ; implicit-def: $vgpr53
+; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
@@ -217572,10 +217590,8 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr50
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: ; implicit-def: $vgpr44
; GFX9-NEXT: ; kill: killed $vgpr50
; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: ; implicit-def: $vgpr53
; GFX9-NEXT: ; implicit-def: $vgpr58
; GFX9-NEXT: ; implicit-def: $vgpr36
; GFX9-NEXT: ; implicit-def: $vgpr57
@@ -217602,41 +217618,14 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr52
; GFX9-NEXT: ; implicit-def: $vgpr51
; GFX9-NEXT: ; implicit-def: $vgpr50
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr53
; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr54
-; GFX9-NEXT: ; implicit-def: $vgpr53
-; GFX9-NEXT: ; implicit-def: $vgpr43
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-NEXT: ; implicit-def: $vgpr43
-; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT: s_waitcnt vmcnt(18)
+; GFX9-NEXT: s_waitcnt vmcnt(8)
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; kill: killed $vgpr33
@@ -217695,6 +217684,27 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr33
; GFX9-NEXT: ; kill: killed $vgpr33
; GFX9-NEXT: ; implicit-def: $vgpr33
+; GFX9-NEXT: ; implicit-def: $vgpr54
+; GFX9-NEXT: ; implicit-def: $vgpr53
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-NEXT: ; implicit-def: $vgpr43
+; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB98_2
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
index 397955a8a8928..dc53b27bc95a8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
@@ -65436,8 +65436,28 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
;
; VI-LABEL: bitcast_v32i16_to_v64i8:
; VI: ; %bb.0:
-; VI-NEXT: ; implicit-def: $vgpr19
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr43
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr19
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
@@ -65458,23 +65478,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
; VI-NEXT: ; implicit-def: $vgpr19
-; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr43
; VI-NEXT: ; kill: killed $vgpr17
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
@@ -65500,11 +65503,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
; VI-NEXT: ; implicit-def: $vgpr19
-; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr41
; VI-NEXT: ; implicit-def: $vgpr18
; VI-NEXT: ; implicit-def: $vgpr54
@@ -65536,6 +65534,8 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr21
; VI-NEXT: ; implicit-def: $vgpr20
; VI-NEXT: ; implicit-def: $vgpr19
+; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr42
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir b/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
new file mode 100644
index 0000000000000..57476be8bc3c3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
@@ -0,0 +1,31 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-insert-hard-clauses -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN-CLAUSE %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-post-ra-bundler -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN-BUNDLE %s
+
+---
+name: clause_implicit_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr32
+
+ ; GCN-CLAUSE-LABEL: name: clause_implicit_def
+ ; GCN-CLAUSE: liveins: $vgpr0, $sgpr32
+ ; GCN-CLAUSE-NEXT: {{ $}}
+ ; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
+ ; GCN-CLAUSE-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; GCN-CLAUSE-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GCN-BUNDLE-LABEL: name: clause_implicit_def
+ ; GCN-BUNDLE: liveins: $vgpr0, $sgpr32
+ ; GCN-BUNDLE-NEXT: {{ $}}
+ ; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
+ ; GCN-BUNDLE-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; GCN-BUNDLE-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+ SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = IMPLICIT_DEF
+ $vgpr3 = IMPLICIT_DEF
+ SCRATCH_STORE_DWORDX2_SADDR $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+...
>From f3550df6a05a7ff8bb43d21e6d4bc75cbae8a85a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 6 Jun 2025 12:01:42 +0800
Subject: [PATCH 2/5] Applying comments from Jayfold and Matthew, using
finilizebundle to fix problem
---
llvm/lib/CodeGen/MachineInstrBundle.cpp | 17 ++++++-
.../lib/Target/AMDGPU/SIInsertHardClauses.cpp | 2 +-
llvm/lib/Target/AMDGPU/SIPostRABundler.cpp | 3 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll | 46 +++++++++----------
.../AMDGPU/bundle-break-phy-liveness.mir | 21 +++++----
5 files changed, 54 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 34896c67144bc..d23acd25ee85d 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -115,6 +115,21 @@ static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
return DebugLoc();
}
+static bool containRegOrSubReg(SmallSetVector<Register, 32> DefRegs,
+ Register Reg, const TargetRegisterInfo *TRI) {
+ if (DefRegs.contains(Reg))
+ return true;
+ if (Reg.isPhysical()) {
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg)) {
+ // Applying same logic with MachineVerifier that any of the SubReg is
+ // contained, it counts defined
+ if (DefRegs.contains(SubReg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// finalizeBundle - Finalize a machine instruction bundle which includes
/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
/// This routine adds a BUNDLE instruction to represent the bundle, it adds
@@ -151,7 +166,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (LocalDefs.contains(Reg)) {
+ if (containRegOrSubReg(LocalDefs, Reg, TRI)) {
MO.setIsInternalRead();
if (MO.isKill()) {
// Internal def is now killed.
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index bdd4f8bc321b5..d8fe8505bc722 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -151,7 +151,7 @@ class SIInsertHardClauses {
// It's safe to treat the rest as illegal.
if (MI.getOpcode() == AMDGPU::S_NOP)
return HARDCLAUSE_INTERNAL;
- if (MI.isMetaInstruction() && MI.getOpcode() != AMDGPU::IMPLICIT_DEF)
+ if (MI.isMetaInstruction())
return HARDCLAUSE_IGNORE;
return HARDCLAUSE_ILLEGAL;
}
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index 48f84286e9214..efdc55b8e68be 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -184,8 +184,7 @@ bool SIPostRABundler::run(MachineFunction &MF) {
if (I->getNumExplicitDefs() != 0)
Defs.insert(I->defs().begin()->getReg());
++ClauseLength;
- } else if (!I->isMetaInstruction() ||
- I->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ } else if (!I->isMetaInstruction()) {
// Allow meta instructions in between bundle candidates, but do not
// start or end a bundle on one.
//
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
index 4bd36705003e8..1024c2a7f066a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
@@ -65448,28 +65448,8 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
;
; VI-LABEL: bitcast_v32i16_to_v64i8:
; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
-; VI-NEXT: ; implicit-def: $vgpr43
-; VI-NEXT: ; implicit-def: $vgpr42
-; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr19
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
@@ -65490,6 +65470,23 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
; VI-NEXT: ; implicit-def: $vgpr19
+; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr43
; VI-NEXT: ; kill: killed $vgpr17
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
@@ -65515,6 +65512,11 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr17
; VI-NEXT: ; kill: killed $vgpr19
; VI-NEXT: ; implicit-def: $vgpr19
+; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; VI-NEXT: ; implicit-def: $vgpr42
+; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr41
; VI-NEXT: ; implicit-def: $vgpr18
; VI-NEXT: ; implicit-def: $vgpr54
@@ -65546,8 +65548,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) {
; VI-NEXT: ; implicit-def: $vgpr21
; VI-NEXT: ; implicit-def: $vgpr20
; VI-NEXT: ; implicit-def: $vgpr19
-; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; VI-NEXT: ; implicit-def: $vgpr42
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir b/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
index 57476be8bc3c3..8045fafc6a6c4 100644
--- a/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
@@ -12,18 +12,23 @@ body: |
; GCN-CLAUSE-LABEL: name: clause_implicit_def
; GCN-CLAUSE: liveins: $vgpr0, $sgpr32
; GCN-CLAUSE-NEXT: {{ $}}
- ; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
- ; GCN-CLAUSE-NEXT: $vgpr2 = IMPLICIT_DEF
- ; GCN-CLAUSE-NEXT: $vgpr3 = IMPLICIT_DEF
- ; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+ ; GCN-CLAUSE-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $sgpr32, implicit $exec, implicit $flat_scr {
+ ; GCN-CLAUSE-NEXT: S_CLAUSE 1
+ ; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
+ ; GCN-CLAUSE-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; GCN-CLAUSE-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORDX2_SADDR internal $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+ ; GCN-CLAUSE-NEXT: }
;
; GCN-BUNDLE-LABEL: name: clause_implicit_def
; GCN-BUNDLE: liveins: $vgpr0, $sgpr32
; GCN-BUNDLE-NEXT: {{ $}}
- ; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
- ; GCN-BUNDLE-NEXT: $vgpr2 = IMPLICIT_DEF
- ; GCN-BUNDLE-NEXT: $vgpr3 = IMPLICIT_DEF
- ; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+ ; GCN-BUNDLE-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $sgpr32, implicit $exec, implicit $flat_scr {
+ ; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
+ ; GCN-BUNDLE-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; GCN-BUNDLE-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORDX2_SADDR internal $vgpr2_vgpr3, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr
+ ; GCN-BUNDLE-NEXT: }
SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
$vgpr2 = IMPLICIT_DEF
$vgpr3 = IMPLICIT_DEF
>From abd5e4bfbaca94898b8a4adb77d1f3fa0b2357b5 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Wed, 25 Jun 2025 16:32:44 +0800
Subject: [PATCH 3/5] use regunit, fix matthew's comments
---
llvm/lib/CodeGen/MachineInstrBundle.cpp | 36 ++++++++++---------
...ess.mir => bundle-breaks-phy-liveness.mir} | 0
2 files changed, 19 insertions(+), 17 deletions(-)
rename llvm/test/CodeGen/AMDGPU/{bundle-break-phy-liveness.mir => bundle-breaks-phy-liveness.mir} (100%)
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index d23acd25ee85d..f238d3c577cbc 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -115,19 +115,17 @@ static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
return DebugLoc();
}
-static bool containRegOrSubReg(SmallSetVector<Register, 32> DefRegs,
- Register Reg, const TargetRegisterInfo *TRI) {
- if (DefRegs.contains(Reg))
- return true;
+static bool containReg(SmallSetVector<Register, 32> LocalDefsV,
+ SmallSetVector<MCRegUnit, 32> LocalDefsP, Register Reg,
+ const TargetRegisterInfo *TRI) {
if (Reg.isPhysical()) {
- for (const MCPhysReg &SubReg : TRI->subregs(Reg)) {
- // Applying same logic with MachineVerifier that any of the SubReg is
- // contained, it counts defined
- if (DefRegs.contains(SubReg))
- return true;
- }
+ for (MCRegUnit Unit : TRI->regunits(MCRegister::from(Reg.id())))
+ if (!LocalDefsP.contains(Unit))
+ return false;
+
+ return true;
}
- return false;
+ return LocalDefsV.contains(Reg);
}
/// finalizeBundle - Finalize a machine instruction bundle which includes
@@ -150,7 +148,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
- SmallSetVector<Register, 32> LocalDefs;
+ SmallSetVector<Register, 32> LocalDefsV;
+ SmallSetVector<MCRegUnit, 32> LocalDefsP;
SmallSet<Register, 8> DeadDefSet;
SmallSet<Register, 16> KilledDefSet;
SmallSetVector<Register, 8> ExternUses;
@@ -166,7 +165,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (containRegOrSubReg(LocalDefs, Reg, TRI)) {
+ if (containReg(LocalDefsV, LocalDefsP, Reg, TRI)) {
MO.setIsInternalRead();
if (MO.isKill()) {
// Internal def is now killed.
@@ -189,7 +188,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (LocalDefs.insert(Reg)) {
+ if (LocalDefsV.insert(Reg)) {
if (MO.isDead())
DeadDefSet.insert(Reg);
} else {
@@ -201,8 +200,11 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
}
- if (!MO.isDead() && Reg.isPhysical())
- LocalDefs.insert_range(TRI->subregs(Reg));
+ if (!MO.isDead() && Reg.isPhysical()) {
+ LocalDefsV.insert_range(TRI->subregs(Reg));
+ for (MCRegUnit Unit : TRI->regunits(MCRegister::from(Reg.id())))
+ LocalDefsP.insert(Unit);
+ }
}
// Set FrameSetup/FrameDestroy for the bundle. If any of the instructions
@@ -213,7 +215,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
MIB.setMIFlag(MachineInstr::FrameDestroy);
}
- for (Register Reg : LocalDefs) {
+ for (Register Reg : LocalDefsV) {
// If it's not live beyond end of the bundle, mark it dead.
bool isDead = DeadDefSet.contains(Reg) || KilledDefSet.contains(Reg);
MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir b/llvm/test/CodeGen/AMDGPU/bundle-breaks-phy-liveness.mir
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/bundle-break-phy-liveness.mir
rename to llvm/test/CodeGen/AMDGPU/bundle-breaks-phy-liveness.mir
>From 5f2f332c4945b1100ef83106b915e19192ac4483 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Wed, 25 Jun 2025 20:07:29 +0800
Subject: [PATCH 4/5] fix test case with space
---
.../Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index b23b0ce759d49..85adfc3c22f5a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -274,7 +274,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK-NEXT: LV: vectorizing VPBB:ir-bb<for.body.preheader> in BB:for.body.preheader
+; CHECK-NEXT: LV: vectorizing VPBB:{{[ ]*}}ir-bb<for.body.preheader> in BB:for.body.preheader
; CHECK-NEXT: LV: filled BB:
; CHECK-NEXT: for.body.preheader: ; preds = %entry
; CHECK-NEXT: %0 = zext i32 %n to i64
@@ -683,7 +683,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK-NEXT: LV: vectorizing VPBB:ir-bb<for.body.preheader> in BB:for.body.preheader
+; CHECK-NEXT: LV: vectorizing VPBB:{{[ ]*}}ir-bb<for.body.preheader> in BB:for.body.preheader
; CHECK-NEXT: LV: filled BB:
; CHECK-NEXT: for.body.preheader: ; preds = %entry
; CHECK-NEXT: %0 = zext i32 %n to i64
>From 44d8db67e8e3f0d68eb099cc9306840cc0685282 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Thu, 26 Jun 2025 18:11:31 +0800
Subject: [PATCH 5/5] remove subreg, fix tests
---
llvm/lib/CodeGen/MachineInstrBundle.cpp | 9 ++--
.../AMDGPU/bundle-breaks-phy-liveness.mir | 4 +-
.../CodeGen/AMDGPU/hard-clause-limit-attr.mir | 12 ++---
.../test/CodeGen/AMDGPU/hard-clause-limit.mir | 30 ++++++------
.../CodeGen/AMDGPU/hard-clauses-img-gfx11.mir | 4 +-
.../CodeGen/AMDGPU/hard-clauses-img-gfx12.mir | 4 +-
llvm/test/CodeGen/AMDGPU/hard-clauses.mir | 46 +++++++++----------
.../CodeGen/AMDGPU/insert-waitcnts-crash.ll | 2 +-
.../CodeGen/AMDGPU/postra-bundle-memops.mir | 36 +++++++--------
.../postra-bundle-vimage-vsample-gfx12.mir | 4 +-
10 files changed, 75 insertions(+), 76 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index f238d3c577cbc..0a5fef6c23035 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -148,7 +148,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
- SmallSetVector<Register, 32> LocalDefsV;
+ SmallSetVector<Register, 32> LocalDefs;
SmallSetVector<MCRegUnit, 32> LocalDefsP;
SmallSet<Register, 8> DeadDefSet;
SmallSet<Register, 16> KilledDefSet;
@@ -165,7 +165,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (containReg(LocalDefsV, LocalDefsP, Reg, TRI)) {
+ if (containReg(LocalDefs, LocalDefsP, Reg, TRI)) {
MO.setIsInternalRead();
if (MO.isKill()) {
// Internal def is now killed.
@@ -188,7 +188,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (LocalDefsV.insert(Reg)) {
+ if (LocalDefs.insert(Reg)) {
if (MO.isDead())
DeadDefSet.insert(Reg);
} else {
@@ -201,7 +201,6 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
if (!MO.isDead() && Reg.isPhysical()) {
- LocalDefsV.insert_range(TRI->subregs(Reg));
for (MCRegUnit Unit : TRI->regunits(MCRegister::from(Reg.id())))
LocalDefsP.insert(Unit);
}
@@ -215,7 +214,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
MIB.setMIFlag(MachineInstr::FrameDestroy);
}
- for (Register Reg : LocalDefsV) {
+ for (Register Reg : LocalDefs) {
// If it's not live beyond end of the bundle, mark it dead.
bool isDead = DeadDefSet.contains(Reg) || KilledDefSet.contains(Reg);
MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-breaks-phy-liveness.mir b/llvm/test/CodeGen/AMDGPU/bundle-breaks-phy-liveness.mir
index 8045fafc6a6c4..5c7dc8a079345 100644
--- a/llvm/test/CodeGen/AMDGPU/bundle-breaks-phy-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/bundle-breaks-phy-liveness.mir
@@ -12,7 +12,7 @@ body: |
; GCN-CLAUSE-LABEL: name: clause_implicit_def
; GCN-CLAUSE: liveins: $vgpr0, $sgpr32
; GCN-CLAUSE-NEXT: {{ $}}
- ; GCN-CLAUSE-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $sgpr32, implicit $exec, implicit $flat_scr {
+ ; GCN-CLAUSE-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $sgpr32, implicit $exec, implicit $flat_scr {
; GCN-CLAUSE-NEXT: S_CLAUSE 1
; GCN-CLAUSE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
; GCN-CLAUSE-NEXT: $vgpr2 = IMPLICIT_DEF
@@ -23,7 +23,7 @@ body: |
; GCN-BUNDLE-LABEL: name: clause_implicit_def
; GCN-BUNDLE: liveins: $vgpr0, $sgpr32
; GCN-BUNDLE-NEXT: {{ $}}
- ; GCN-BUNDLE-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $sgpr32, implicit $exec, implicit $flat_scr {
+ ; GCN-BUNDLE-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $sgpr32, implicit $exec, implicit $flat_scr {
; GCN-BUNDLE-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr
; GCN-BUNDLE-NEXT: $vgpr2 = IMPLICIT_DEF
; GCN-BUNDLE-NEXT: $vgpr3 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clause-limit-attr.mir b/llvm/test/CodeGen/AMDGPU/hard-clause-limit-attr.mir
index cfd4ea155316a..fb3f328f9d58c 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clause-limit-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clause-limit-attr.mir
@@ -18,7 +18,7 @@ body: |
; GFX11-LABEL: name: long_clause_32
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 31
; GFX11-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -96,7 +96,7 @@ body: |
; GFX11-LABEL: name: long_clause_7
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 6
; GFX11-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -106,7 +106,7 @@ body: |
; GFX11-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec
; GFX11-NEXT: }
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 6
; GFX11-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec
@@ -116,7 +116,7 @@ body: |
; GFX11-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec
; GFX11-NEXT: }
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 6
; GFX11-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec
@@ -126,7 +126,7 @@ body: |
; GFX11-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec
; GFX11-NEXT: }
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 6
; GFX11-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec
@@ -136,7 +136,7 @@ body: |
; GFX11-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec
; GFX11-NEXT: }
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 3
; GFX11-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clause-limit.mir b/llvm/test/CodeGen/AMDGPU/hard-clause-limit.mir
index 98221c21eee2e..c00e25cfbcbff 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clause-limit.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clause-limit.mir
@@ -14,7 +14,7 @@ body: |
; GFX11-32-LABEL: name: long_clause
; GFX11-32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-32-NEXT: {{ $}}
- ; GFX11-32-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-32-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-32-NEXT: S_CLAUSE 31
; GFX11-32-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-32-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -53,7 +53,7 @@ body: |
; GFX11-16-LABEL: name: long_clause
; GFX11-16: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-16-NEXT: {{ $}}
- ; GFX11-16-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-16-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-16-NEXT: S_CLAUSE 15
; GFX11-16-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-16-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -72,7 +72,7 @@ body: |
; GFX11-16-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec
; GFX11-16-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec
; GFX11-16-NEXT: }
- ; GFX11-16-NEXT: BUNDLE implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-16-NEXT: BUNDLE implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-16-NEXT: S_CLAUSE 15
; GFX11-16-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec
; GFX11-16-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec
@@ -95,7 +95,7 @@ body: |
; GFX11-10-LABEL: name: long_clause
; GFX11-10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-10-NEXT: {{ $}}
- ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-10-NEXT: S_CLAUSE 9
; GFX11-10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -108,7 +108,7 @@ body: |
; GFX11-10-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec
; GFX11-10-NEXT: }
- ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-10-NEXT: S_CLAUSE 9
; GFX11-10-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec
@@ -121,7 +121,7 @@ body: |
; GFX11-10-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec
; GFX11-10-NEXT: }
- ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-10-NEXT: S_CLAUSE 9
; GFX11-10-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec
@@ -134,7 +134,7 @@ body: |
; GFX11-10-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec
; GFX11-10-NEXT: }
- ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-10-NEXT: BUNDLE implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-10-NEXT: S_CLAUSE 1
; GFX11-10-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec
; GFX11-10-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec
@@ -143,56 +143,56 @@ body: |
; GFX11-4-LABEL: name: long_clause
; GFX11-4: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-4-NEXT: {{ $}}
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec
; GFX11-4-NEXT: }
- ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-4-NEXT: BUNDLE implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-4-NEXT: S_CLAUSE 3
; GFX11-4-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec
; GFX11-4-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx11.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx11.mir
index 0829cabe56fe1..7e1055b2a28a4 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx11.mir
@@ -11,7 +11,7 @@ body: |
; CHECK-LABEL: name: mimg_nsa
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr11_vgpr12, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr8, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr20_vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr8, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx11 $vgpr3, $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
; CHECK-NEXT: $vgpr20_vgpr21_vgpr22 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx11 $vgpr3, $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
@@ -29,7 +29,7 @@ body: |
; CHECK-LABEL: name: mimg_nsa_mixed
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr8, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr10, implicit-def $vgpr14, implicit-def $vgpr20_vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr8, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5_vgpr6 {
; CHECK-NEXT: S_CLAUSE 2
; CHECK-NEXT: $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx11 $vgpr3, $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
; CHECK-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx11 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx12.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx12.mir
index 243a84562ab30..9689dda9932ed 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx12.mir
@@ -10,7 +10,7 @@ body: |
; CHECK-LABEL: name: mimg
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr11_vgpr12, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr20_vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $vgpr10_vgpr11_vgpr12 = IMAGE_SAMPLE_LZ_V3_V2_gfx12 $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
; CHECK-NEXT: $vgpr20_vgpr21_vgpr22 = IMAGE_SAMPLE_LZ_V3_V2_gfx12 $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
@@ -28,7 +28,7 @@ body: |
; CHECK-LABEL: name: mimg_mixed
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5, implicit $vgpr6 {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr10, implicit-def $vgpr14, implicit-def $vgpr20_vgpr21_vgpr22, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5, implicit $vgpr6 {
; CHECK-NEXT: S_CLAUSE 2
; CHECK-NEXT: $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx12 $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
; CHECK-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx12 $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
index 44b988a7121c7..1341a5916df4b 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
@@ -39,7 +39,7 @@ body: |
; CHECK-LABEL: name: nop2
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
; CHECK-NEXT: S_CLAUSE 2
; CHECK-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; CHECK-NEXT: S_NOP 2
@@ -49,7 +49,7 @@ body: |
; GFX11-LABEL: name: nop2
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
; GFX11-NEXT: S_CLAUSE 2
; GFX11-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX11-NEXT: S_NOP 2
@@ -59,7 +59,7 @@ body: |
; GFX12-LABEL: name: nop2
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
; GFX12-NEXT: S_CLAUSE 2
; GFX12-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX12-NEXT: S_NOP 2
@@ -79,7 +79,7 @@ body: |
; CHECK-LABEL: name: nop3
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
; CHECK-NEXT: S_CLAUSE 2
; CHECK-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; CHECK-NEXT: S_NOP 2
@@ -90,7 +90,7 @@ body: |
; GFX11-LABEL: name: nop3
; GFX11: liveins: $sgpr0_sgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
; GFX11-NEXT: S_CLAUSE 2
; GFX11-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX11-NEXT: S_NOP 2
@@ -101,7 +101,7 @@ body: |
; GFX12-LABEL: name: nop3
; GFX12: liveins: $sgpr0_sgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
; GFX12-NEXT: S_CLAUSE 2
; GFX12-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX12-NEXT: S_NOP 2
@@ -123,7 +123,7 @@ body: |
; CHECK-LABEL: name: long_clause
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit-def $vgpr33, implicit-def $vgpr33_lo16, implicit-def $vgpr33_hi16, implicit-def $vgpr34, implicit-def $vgpr34_lo16, implicit-def $vgpr34_hi16, implicit-def $vgpr35, implicit-def $vgpr35_lo16, implicit-def $vgpr35_hi16, implicit-def $vgpr36, implicit-def $vgpr36_lo16, implicit-def $vgpr36_hi16, implicit-def $vgpr37, implicit-def $vgpr37_lo16, implicit-def $vgpr37_hi16, implicit-def $vgpr38, implicit-def $vgpr38_lo16, implicit-def $vgpr38_hi16, implicit-def $vgpr39, implicit-def $vgpr39_lo16, implicit-def $vgpr39_hi16, implicit-def $vgpr40, implicit-def $vgpr40_lo16, implicit-def $vgpr40_hi16, implicit-def $vgpr41, implicit-def $vgpr41_lo16, implicit-def $vgpr41_hi16, implicit-def $vgpr42, implicit-def $vgpr42_lo16, implicit-def $vgpr42_hi16, implicit-def $vgpr43, implicit-def $vgpr43_lo16, implicit-def $vgpr43_hi16, implicit-def $vgpr44, implicit-def $vgpr44_lo16, implicit-def $vgpr44_hi16, implicit-def $vgpr45, implicit-def $vgpr45_lo16, implicit-def $vgpr45_hi16, implicit-def $vgpr46, implicit-def $vgpr46_lo16, implicit-def $vgpr46_hi16, implicit-def $vgpr47, implicit-def $vgpr47_lo16, implicit-def $vgpr47_hi16, implicit-def $vgpr48, implicit-def $vgpr48_lo16, implicit-def $vgpr48_hi16, implicit-def $vgpr49, implicit-def $vgpr49_lo16, implicit-def $vgpr49_hi16, implicit-def $vgpr50, implicit-def $vgpr50_lo16, implicit-def $vgpr50_hi16, implicit-def $vgpr51, implicit-def $vgpr51_lo16, implicit-def $vgpr51_hi16, implicit-def $vgpr52, implicit-def $vgpr52_lo16, implicit-def $vgpr52_hi16, implicit-def $vgpr53, implicit-def $vgpr53_lo16, implicit-def $vgpr53_hi16, implicit-def $vgpr54, implicit-def $vgpr54_lo16, implicit-def $vgpr54_hi16, implicit-def $vgpr55, implicit-def $vgpr55_lo16, implicit-def $vgpr55_hi16, implicit-def $vgpr56, implicit-def $vgpr56_lo16, implicit-def $vgpr56_hi16, implicit-def $vgpr57, implicit-def $vgpr57_lo16, implicit-def $vgpr57_hi16, implicit-def $vgpr58, implicit-def $vgpr58_lo16, implicit-def $vgpr58_hi16, implicit-def $vgpr59, implicit-def $vgpr59_lo16, implicit-def $vgpr59_hi16, implicit-def $vgpr60, implicit-def $vgpr60_lo16, implicit-def $vgpr60_hi16, implicit-def $vgpr61, implicit-def $vgpr61_lo16, implicit-def $vgpr61_hi16, implicit-def $vgpr62, implicit-def $vgpr62_lo16, implicit-def $vgpr62_hi16, implicit-def $vgpr63, implicit-def $vgpr63_lo16, implicit-def $vgpr63_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit-def $vgpr33, implicit-def $vgpr34, implicit-def $vgpr35, implicit-def $vgpr36, implicit-def $vgpr37, implicit-def $vgpr38, implicit-def $vgpr39, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; CHECK-NEXT: S_CLAUSE 62
; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -189,7 +189,7 @@ body: |
; CHECK-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 248, 0, 0, implicit $exec
; CHECK-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, implicit $exec
; CHECK-NEXT: }
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr64, implicit-def $vgpr64_lo16, implicit-def $vgpr64_hi16, implicit-def $vgpr65, implicit-def $vgpr65_lo16, implicit-def $vgpr65_hi16, implicit-def $vgpr66, implicit-def $vgpr66_lo16, implicit-def $vgpr66_hi16, implicit-def $vgpr67, implicit-def $vgpr67_lo16, implicit-def $vgpr67_hi16, implicit-def $vgpr68, implicit-def $vgpr68_lo16, implicit-def $vgpr68_hi16, implicit-def $vgpr69, implicit-def $vgpr69_lo16, implicit-def $vgpr69_hi16, implicit-def $vgpr70, implicit-def $vgpr70_lo16, implicit-def $vgpr70_hi16, implicit-def $vgpr71, implicit-def $vgpr71_lo16, implicit-def $vgpr71_hi16, implicit-def $vgpr72, implicit-def $vgpr72_lo16, implicit-def $vgpr72_hi16, implicit-def $vgpr73, implicit-def $vgpr73_lo16, implicit-def $vgpr73_hi16, implicit-def $vgpr74, implicit-def $vgpr74_lo16, implicit-def $vgpr74_hi16, implicit-def $vgpr75, implicit-def $vgpr75_lo16, implicit-def $vgpr75_hi16, implicit-def $vgpr76, implicit-def $vgpr76_lo16, implicit-def $vgpr76_hi16, implicit-def $vgpr77, implicit-def $vgpr77_lo16, implicit-def $vgpr77_hi16, implicit-def $vgpr78, implicit-def $vgpr78_lo16, implicit-def $vgpr78_hi16, implicit-def $vgpr79, implicit-def $vgpr79_lo16, implicit-def $vgpr79_hi16, implicit-def $vgpr80, implicit-def $vgpr80_lo16, implicit-def $vgpr80_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66, implicit-def $vgpr67, implicit-def $vgpr68, implicit-def $vgpr69, implicit-def $vgpr70, implicit-def $vgpr71, implicit-def $vgpr72, implicit-def $vgpr73, implicit-def $vgpr74, implicit-def $vgpr75, implicit-def $vgpr76, implicit-def $vgpr77, implicit-def $vgpr78, implicit-def $vgpr79, implicit-def $vgpr80, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; CHECK-NEXT: S_CLAUSE 16
; CHECK-NEXT: $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, implicit $exec
; CHECK-NEXT: $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, implicit $exec
@@ -213,7 +213,7 @@ body: |
; GFX11-LABEL: name: long_clause
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 31
; GFX11-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -248,7 +248,7 @@ body: |
; GFX11-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec
; GFX11-NEXT: }
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr33, implicit-def $vgpr33_lo16, implicit-def $vgpr33_hi16, implicit-def $vgpr34, implicit-def $vgpr34_lo16, implicit-def $vgpr34_hi16, implicit-def $vgpr35, implicit-def $vgpr35_lo16, implicit-def $vgpr35_hi16, implicit-def $vgpr36, implicit-def $vgpr36_lo16, implicit-def $vgpr36_hi16, implicit-def $vgpr37, implicit-def $vgpr37_lo16, implicit-def $vgpr37_hi16, implicit-def $vgpr38, implicit-def $vgpr38_lo16, implicit-def $vgpr38_hi16, implicit-def $vgpr39, implicit-def $vgpr39_lo16, implicit-def $vgpr39_hi16, implicit-def $vgpr40, implicit-def $vgpr40_lo16, implicit-def $vgpr40_hi16, implicit-def $vgpr41, implicit-def $vgpr41_lo16, implicit-def $vgpr41_hi16, implicit-def $vgpr42, implicit-def $vgpr42_lo16, implicit-def $vgpr42_hi16, implicit-def $vgpr43, implicit-def $vgpr43_lo16, implicit-def $vgpr43_hi16, implicit-def $vgpr44, implicit-def $vgpr44_lo16, implicit-def $vgpr44_hi16, implicit-def $vgpr45, implicit-def $vgpr45_lo16, implicit-def $vgpr45_hi16, implicit-def $vgpr46, implicit-def $vgpr46_lo16, implicit-def $vgpr46_hi16, implicit-def $vgpr47, implicit-def $vgpr47_lo16, implicit-def $vgpr47_hi16, implicit-def $vgpr48, implicit-def $vgpr48_lo16, implicit-def $vgpr48_hi16, implicit-def $vgpr49, implicit-def $vgpr49_lo16, implicit-def $vgpr49_hi16, implicit-def $vgpr50, implicit-def $vgpr50_lo16, implicit-def $vgpr50_hi16, implicit-def $vgpr51, implicit-def $vgpr51_lo16, implicit-def $vgpr51_hi16, implicit-def $vgpr52, implicit-def $vgpr52_lo16, implicit-def $vgpr52_hi16, implicit-def $vgpr53, implicit-def $vgpr53_lo16, implicit-def $vgpr53_hi16, implicit-def $vgpr54, implicit-def $vgpr54_lo16, implicit-def $vgpr54_hi16, implicit-def $vgpr55, implicit-def $vgpr55_lo16, implicit-def $vgpr55_hi16, implicit-def $vgpr56, implicit-def $vgpr56_lo16, implicit-def $vgpr56_hi16, implicit-def $vgpr57, implicit-def $vgpr57_lo16, implicit-def $vgpr57_hi16, implicit-def $vgpr58, implicit-def $vgpr58_lo16, implicit-def $vgpr58_hi16, implicit-def $vgpr59, implicit-def $vgpr59_lo16, implicit-def $vgpr59_hi16, implicit-def $vgpr60, implicit-def $vgpr60_lo16, implicit-def $vgpr60_hi16, implicit-def $vgpr61, implicit-def $vgpr61_lo16, implicit-def $vgpr61_hi16, implicit-def $vgpr62, implicit-def $vgpr62_lo16, implicit-def $vgpr62_hi16, implicit-def $vgpr63, implicit-def $vgpr63_lo16, implicit-def $vgpr63_hi16, implicit-def $vgpr64, implicit-def $vgpr64_lo16, implicit-def $vgpr64_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr33, implicit-def $vgpr34, implicit-def $vgpr35, implicit-def $vgpr36, implicit-def $vgpr37, implicit-def $vgpr38, implicit-def $vgpr39, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 31
; GFX11-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr34 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, implicit $exec
@@ -283,7 +283,7 @@ body: |
; GFX11-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, implicit $exec
; GFX11-NEXT: }
- ; GFX11-NEXT: BUNDLE implicit-def $vgpr65, implicit-def $vgpr65_lo16, implicit-def $vgpr65_hi16, implicit-def $vgpr66, implicit-def $vgpr66_lo16, implicit-def $vgpr66_hi16, implicit-def $vgpr67, implicit-def $vgpr67_lo16, implicit-def $vgpr67_hi16, implicit-def $vgpr68, implicit-def $vgpr68_lo16, implicit-def $vgpr68_hi16, implicit-def $vgpr69, implicit-def $vgpr69_lo16, implicit-def $vgpr69_hi16, implicit-def $vgpr70, implicit-def $vgpr70_lo16, implicit-def $vgpr70_hi16, implicit-def $vgpr71, implicit-def $vgpr71_lo16, implicit-def $vgpr71_hi16, implicit-def $vgpr72, implicit-def $vgpr72_lo16, implicit-def $vgpr72_hi16, implicit-def $vgpr73, implicit-def $vgpr73_lo16, implicit-def $vgpr73_hi16, implicit-def $vgpr74, implicit-def $vgpr74_lo16, implicit-def $vgpr74_hi16, implicit-def $vgpr75, implicit-def $vgpr75_lo16, implicit-def $vgpr75_hi16, implicit-def $vgpr76, implicit-def $vgpr76_lo16, implicit-def $vgpr76_hi16, implicit-def $vgpr77, implicit-def $vgpr77_lo16, implicit-def $vgpr77_hi16, implicit-def $vgpr78, implicit-def $vgpr78_lo16, implicit-def $vgpr78_hi16, implicit-def $vgpr79, implicit-def $vgpr79_lo16, implicit-def $vgpr79_hi16, implicit-def $vgpr80, implicit-def $vgpr80_lo16, implicit-def $vgpr80_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr65, implicit-def $vgpr66, implicit-def $vgpr67, implicit-def $vgpr68, implicit-def $vgpr69, implicit-def $vgpr70, implicit-def $vgpr71, implicit-def $vgpr72, implicit-def $vgpr73, implicit-def $vgpr74, implicit-def $vgpr75, implicit-def $vgpr76, implicit-def $vgpr77, implicit-def $vgpr78, implicit-def $vgpr79, implicit-def $vgpr80, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX11-NEXT: S_CLAUSE 15
; GFX11-NEXT: $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, implicit $exec
; GFX11-NEXT: $vgpr66 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, implicit $exec
@@ -306,7 +306,7 @@ body: |
; GFX12-LABEL: name: long_clause
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX12-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31, implicit-def $vgpr32, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX12-NEXT: S_CLAUSE 31
; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec
; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec
@@ -341,7 +341,7 @@ body: |
; GFX12-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec
; GFX12-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec
; GFX12-NEXT: }
- ; GFX12-NEXT: BUNDLE implicit-def $vgpr33, implicit-def $vgpr33_lo16, implicit-def $vgpr33_hi16, implicit-def $vgpr34, implicit-def $vgpr34_lo16, implicit-def $vgpr34_hi16, implicit-def $vgpr35, implicit-def $vgpr35_lo16, implicit-def $vgpr35_hi16, implicit-def $vgpr36, implicit-def $vgpr36_lo16, implicit-def $vgpr36_hi16, implicit-def $vgpr37, implicit-def $vgpr37_lo16, implicit-def $vgpr37_hi16, implicit-def $vgpr38, implicit-def $vgpr38_lo16, implicit-def $vgpr38_hi16, implicit-def $vgpr39, implicit-def $vgpr39_lo16, implicit-def $vgpr39_hi16, implicit-def $vgpr40, implicit-def $vgpr40_lo16, implicit-def $vgpr40_hi16, implicit-def $vgpr41, implicit-def $vgpr41_lo16, implicit-def $vgpr41_hi16, implicit-def $vgpr42, implicit-def $vgpr42_lo16, implicit-def $vgpr42_hi16, implicit-def $vgpr43, implicit-def $vgpr43_lo16, implicit-def $vgpr43_hi16, implicit-def $vgpr44, implicit-def $vgpr44_lo16, implicit-def $vgpr44_hi16, implicit-def $vgpr45, implicit-def $vgpr45_lo16, implicit-def $vgpr45_hi16, implicit-def $vgpr46, implicit-def $vgpr46_lo16, implicit-def $vgpr46_hi16, implicit-def $vgpr47, implicit-def $vgpr47_lo16, implicit-def $vgpr47_hi16, implicit-def $vgpr48, implicit-def $vgpr48_lo16, implicit-def $vgpr48_hi16, implicit-def $vgpr49, implicit-def $vgpr49_lo16, implicit-def $vgpr49_hi16, implicit-def $vgpr50, implicit-def $vgpr50_lo16, implicit-def $vgpr50_hi16, implicit-def $vgpr51, implicit-def $vgpr51_lo16, implicit-def $vgpr51_hi16, implicit-def $vgpr52, implicit-def $vgpr52_lo16, implicit-def $vgpr52_hi16, implicit-def $vgpr53, implicit-def $vgpr53_lo16, implicit-def $vgpr53_hi16, implicit-def $vgpr54, implicit-def $vgpr54_lo16, implicit-def $vgpr54_hi16, implicit-def $vgpr55, implicit-def $vgpr55_lo16, implicit-def $vgpr55_hi16, implicit-def $vgpr56, implicit-def $vgpr56_lo16, implicit-def $vgpr56_hi16, implicit-def $vgpr57, implicit-def $vgpr57_lo16, implicit-def $vgpr57_hi16, implicit-def $vgpr58, implicit-def $vgpr58_lo16, implicit-def $vgpr58_hi16, implicit-def $vgpr59, implicit-def $vgpr59_lo16, implicit-def $vgpr59_hi16, implicit-def $vgpr60, implicit-def $vgpr60_lo16, implicit-def $vgpr60_hi16, implicit-def $vgpr61, implicit-def $vgpr61_lo16, implicit-def $vgpr61_hi16, implicit-def $vgpr62, implicit-def $vgpr62_lo16, implicit-def $vgpr62_hi16, implicit-def $vgpr63, implicit-def $vgpr63_lo16, implicit-def $vgpr63_hi16, implicit-def $vgpr64, implicit-def $vgpr64_lo16, implicit-def $vgpr64_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX12-NEXT: BUNDLE implicit-def $vgpr33, implicit-def $vgpr34, implicit-def $vgpr35, implicit-def $vgpr36, implicit-def $vgpr37, implicit-def $vgpr38, implicit-def $vgpr39, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX12-NEXT: S_CLAUSE 31
; GFX12-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec
; GFX12-NEXT: $vgpr34 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, implicit $exec
@@ -376,7 +376,7 @@ body: |
; GFX12-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, implicit $exec
; GFX12-NEXT: $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, implicit $exec
; GFX12-NEXT: }
- ; GFX12-NEXT: BUNDLE implicit-def $vgpr65, implicit-def $vgpr65_lo16, implicit-def $vgpr65_hi16, implicit-def $vgpr66, implicit-def $vgpr66_lo16, implicit-def $vgpr66_hi16, implicit-def $vgpr67, implicit-def $vgpr67_lo16, implicit-def $vgpr67_hi16, implicit-def $vgpr68, implicit-def $vgpr68_lo16, implicit-def $vgpr68_hi16, implicit-def $vgpr69, implicit-def $vgpr69_lo16, implicit-def $vgpr69_hi16, implicit-def $vgpr70, implicit-def $vgpr70_lo16, implicit-def $vgpr70_hi16, implicit-def $vgpr71, implicit-def $vgpr71_lo16, implicit-def $vgpr71_hi16, implicit-def $vgpr72, implicit-def $vgpr72_lo16, implicit-def $vgpr72_hi16, implicit-def $vgpr73, implicit-def $vgpr73_lo16, implicit-def $vgpr73_hi16, implicit-def $vgpr74, implicit-def $vgpr74_lo16, implicit-def $vgpr74_hi16, implicit-def $vgpr75, implicit-def $vgpr75_lo16, implicit-def $vgpr75_hi16, implicit-def $vgpr76, implicit-def $vgpr76_lo16, implicit-def $vgpr76_hi16, implicit-def $vgpr77, implicit-def $vgpr77_lo16, implicit-def $vgpr77_hi16, implicit-def $vgpr78, implicit-def $vgpr78_lo16, implicit-def $vgpr78_hi16, implicit-def $vgpr79, implicit-def $vgpr79_lo16, implicit-def $vgpr79_hi16, implicit-def $vgpr80, implicit-def $vgpr80_lo16, implicit-def $vgpr80_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
+ ; GFX12-NEXT: BUNDLE implicit-def $vgpr65, implicit-def $vgpr66, implicit-def $vgpr67, implicit-def $vgpr68, implicit-def $vgpr69, implicit-def $vgpr70, implicit-def $vgpr71, implicit-def $vgpr72, implicit-def $vgpr73, implicit-def $vgpr74, implicit-def $vgpr75, implicit-def $vgpr76, implicit-def $vgpr77, implicit-def $vgpr78, implicit-def $vgpr79, implicit-def $vgpr80, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; GFX12-NEXT: S_CLAUSE 15
; GFX12-NEXT: $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, implicit $exec
; GFX12-NEXT: $vgpr66 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, implicit $exec
@@ -486,7 +486,7 @@ body: |
; CHECK-LABEL: name: kill
; CHECK: liveins: $sgpr0_sgpr1, $sgpr4
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; CHECK-NEXT: KILL undef renamable $sgpr4
@@ -496,7 +496,7 @@ body: |
; GFX11-LABEL: name: kill
; GFX11: liveins: $sgpr0_sgpr1, $sgpr4
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
; GFX11-NEXT: S_CLAUSE 1
; GFX11-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX11-NEXT: KILL undef renamable $sgpr4
@@ -506,7 +506,7 @@ body: |
; GFX12-LABEL: name: kill
; GFX12: liveins: $sgpr0_sgpr1, $sgpr4
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
; GFX12-NEXT: S_CLAUSE 1
; GFX12-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX12-NEXT: KILL undef renamable $sgpr4
@@ -526,7 +526,7 @@ body: |
; CHECK-LABEL: name: kill2
; CHECK: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; CHECK-NEXT: KILL undef renamable $sgpr4
@@ -537,7 +537,7 @@ body: |
; GFX11-LABEL: name: kill2
; GFX11: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; GFX11-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
; GFX11-NEXT: S_CLAUSE 1
; GFX11-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX11-NEXT: KILL undef renamable $sgpr4
@@ -548,7 +548,7 @@ body: |
; GFX12-LABEL: name: kill2
; GFX12: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; GFX12-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
; GFX12-NEXT: S_CLAUSE 1
; GFX12-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GFX12-NEXT: KILL undef renamable $sgpr4
@@ -570,7 +570,7 @@ body: |
; CHECK-LABEL: name: flat_load_atomic
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr, implicit $vgpr2 {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr3, implicit-def $vgpr4, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr, implicit $vgpr2 {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; CHECK-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 4, 0, implicit $exec, implicit $flat_scr
@@ -600,7 +600,7 @@ body: |
; CHECK-LABEL: name: global_load_atomic
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $vgpr2 {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr3, implicit-def $vgpr4, implicit $vgpr0_vgpr1, implicit $exec, implicit $vgpr2 {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; CHECK-NEXT: $vgpr4 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 4, 0, implicit $exec
@@ -657,7 +657,7 @@ body: |
; CHECK-LABEL: name: buffer_load_atomic
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $exec, implicit $vgpr0 {
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $exec, implicit $vgpr0 {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = BUFFER_ATOMIC_ADD_OFFSET_RTN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 4, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
index 1f518386c63d5..8bb6f3ffdf208 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -17,7 +17,7 @@ define fastcc i32 @foo() {
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17
; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
- ; CHECK-NEXT: BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $scc {
+ ; CHECK-NEXT: BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $scc, implicit-def $sgpr17 {
; CHECK-NEXT: $sgpr16_sgpr17 = S_GETPC_B64
; CHECK-NEXT: $sgpr16 = S_ADD_U32 internal $sgpr16, target-flags(amdgpu-gotprel32-lo) @bar + 4, implicit-def $scc
; CHECK-NEXT: $sgpr17 = S_ADDC_U32 internal $sgpr17, target-flags(amdgpu-gotprel32-hi) @bar + 12, implicit-def $scc, implicit internal $scc
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index 458afca384911..1e6c00a54a012 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -10,13 +10,13 @@ body: |
; GCN-LABEL: name: bundle_memops
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit undef $vgpr3_vgpr4, implicit $exec {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit undef $vgpr3_vgpr4, implicit $exec {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
; GCN-NEXT: }
; GCN-NEXT: S_NOP 0
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
- ; GCN-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr5, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 {
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, implicit $exec
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
@@ -35,7 +35,7 @@ body: |
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: S_NOP 0
; GCN-NEXT: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
- ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
; GCN-NEXT: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -44,11 +44,11 @@ body: |
; GCN-NEXT: DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
; GCN-NEXT: }
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
+ ; GCN-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0
; GCN-NEXT: }
- ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 {
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -56,7 +56,7 @@ body: |
; GCN-NEXT: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: }
- ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
; GCN-NEXT: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
; GCN-NEXT: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
; GCN-NEXT: }
@@ -68,7 +68,7 @@ body: |
; GCN-NEXT: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0
; GCN-NEXT: $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
; GCN-NEXT: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -124,7 +124,7 @@ body: |
; GCN-LABEL: name: bundle_dbg_value_0
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: DBG_VALUE $vgpr0, 0, 0
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
@@ -145,7 +145,7 @@ body: |
; GCN-LABEL: name: bundle_dbg_value_1
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr2, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: DBG_VALUE $vgpr0, 0, 0
; GCN-NEXT: DBG_VALUE $vgpr1, 0, 0
@@ -170,7 +170,7 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: DBG_VALUE $vgpr1, 0, 0
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr2, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: DBG_VALUE $vgpr0, 0, 0
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
@@ -193,7 +193,7 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr2, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: KILL $vgpr1
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
@@ -215,7 +215,7 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr2, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: KILL internal $vgpr0
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
@@ -239,7 +239,7 @@ body: |
; GCN-LABEL: name: post_bundle_kill
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -258,7 +258,7 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -280,7 +280,7 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -300,7 +300,7 @@ body: |
; GCN-LABEL: name: post_bundle_multi_kill_0
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -320,7 +320,7 @@ body: |
; GCN-LABEL: name: post_bundle_multi_kill_1
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN-NEXT: }
@@ -341,7 +341,7 @@ body: |
; GCN-LABEL: name: post_bundle_kill_and_null_reg_dbginfo
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN-NEXT: DBG_VALUE $noreg, $noreg
; GCN-NEXT: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-vimage-vsample-gfx12.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-vimage-vsample-gfx12.mir
index 6ab8a343dd96b..5fea0aee72ec7 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-vimage-vsample-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-vimage-vsample-gfx12.mir
@@ -9,7 +9,7 @@ body: |
; GFX12-LABEL: name: post_bundle_vimage
; GFX12: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
+ ; GFX12-NEXT: BUNDLE implicit-def $vgpr5, implicit-def $vgpr4, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
; GFX12-NEXT: $vgpr5 = IMAGE_LOAD_V1_V1_gfx12 $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, 1, 0, 0, -1, 0, 0, implicit $exec :: (dereferenceable invariant load (s32), addrspace 8)
; GFX12-NEXT: $vgpr4 = IMAGE_LOAD_V1_V1_gfx12 killed $vgpr1, killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, -1, 0, 0, implicit $exec :: (dereferenceable invariant load (s32), addrspace 8)
; GFX12-NEXT: }
@@ -25,7 +25,7 @@ body: |
; GFX12-LABEL: name: post_bundle_vsample
; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: BUNDLE implicit-def $vgpr6_vgpr7_vgpr8_vgpr9, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr6_vgpr7, implicit-def $vgpr6_vgpr7_vgpr8, implicit-def $vgpr7_vgpr8, implicit-def $vgpr7_vgpr8_vgpr9, implicit-def $vgpr8_vgpr9, implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit killed $vgpr0, implicit killed $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit killed $vgpr2, implicit killed $vgpr3 {
+ ; GFX12-NEXT: BUNDLE implicit-def $vgpr6_vgpr7_vgpr8_vgpr9, implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit killed $vgpr0, implicit killed $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit killed $vgpr2, implicit killed $vgpr3 {
; GFX12-NEXT: $vgpr6_vgpr7_vgpr8_vgpr9 = IMAGE_SAMPLE_V4_V2_gfx12 killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
; GFX12-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_V4_V2_gfx12 killed $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
; GFX12-NEXT: }
More information about the llvm-commits
mailing list