[llvm] [LLVM] Insert IMPLICIT_DEF for a register sequence if any operand is undef (PR #158000)
Abhinav Garg via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 23:14:59 PDT 2025
https://github.com/abhigargrepo created https://github.com/llvm/llvm-project/pull/158000
Currently, live interval analysis is unable to track the undefined sub parts of a register tuple.
This patch will insert IMPLICIT_DEF for such tuples in two address instruction pass so as to track the live intervals correctly.
>From 16604511e62088373dcd20123eefb9c0d10ae61a Mon Sep 17 00:00:00 2001
From: Abhinav Garg <abhigarg at amd.com>
Date: Tue, 9 Sep 2025 09:25:33 -0700
Subject: [PATCH 1/2] Fixing run lines to make test case simple
---
.../AMDGPU/llvm.amdgcn.image.sample.noret.ll | 182 +++--
.../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll | 707 ++++++++++++++++--
2 files changed, 746 insertions(+), 143 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
index 4873b42a235e3..c905e38cba443 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@@ -296,29 +296,53 @@ main_body:
}
define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
-; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3:
-; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
-; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
-; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
-; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
+; GFX10-SDAG-LABEL: sample_nortn_mix_3:
+; GFX10-SDAG: ; %bb.0: ; %main_body
+; GFX10-SDAG-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: ; return to shader part epilog
;
-; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3:
-; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
-; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
-; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
+; GFX10-GISEL-LABEL: sample_nortn_mix_3:
+; GFX10-GISEL: ; %bb.0: ; %main_body
+; GFX10-GISEL-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: sample_nortn_mix_3:
+; GFX11-SDAG: ; %bb.0: ; %main_body
+; GFX11-SDAG-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: sample_nortn_mix_3:
+; GFX11-GISEL: ; %bb.0: ; %main_body
+; GFX11-GISEL-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_3:
; GFX12-SDAG: ; %bb.0: ; %main_body
@@ -352,39 +376,73 @@ main_body:
}
define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
-; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4:
-; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
-; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
-; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
-; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2)
-; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
+; GFX10-SDAG-LABEL: sample_nortn_mix_4:
+; GFX10-SDAG: ; %bb.0: ; %main_body
+; GFX10-SDAG-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX10-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: sample_nortn_mix_4:
+; GFX10-GISEL: ; %bb.0: ; %main_body
+; GFX10-GISEL-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX10-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-LABEL: sample_nortn_mix_4:
+; GFX11-SDAG: ; %bb.0: ; %main_body
+; GFX11-SDAG-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX11-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: ; return to shader part epilog
;
-; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4:
-; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
-; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
-; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2)
-; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
+; GFX11-GISEL-LABEL: sample_nortn_mix_4:
+; GFX11-GISEL: ; %bb.0: ; %main_body
+; GFX11-GISEL-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX11-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_4:
; GFX12-SDAG: ; %bb.0: ; %main_body
@@ -477,12 +535,8 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
-; GFX10-GISEL: {{.*}}
-; GFX10-SDAG: {{.*}}
; GFX11: {{.*}}
-; GFX11-GISEL: {{.*}}
; GFX11-GISEL-FAKE16: {{.*}}
-; GFX11-SDAG: {{.*}}
; GFX11-SDAG-FAKE16: {{.*}}
; GFX11-SDAG-TRUE16: {{.*}}
; GFX12-GISEL-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
index e7d8683137dd5..03158f1141969 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
@@ -1,186 +1,733 @@
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10PLUS %s
-; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10PLUS %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GFX10PLUS %s
-; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GFX10PLUS %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GFX10PLUS %s
-; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GFX10PLUS %s
-
-; GFX10PLUS-LABEL: {{^}}dpp8_test:
-; GFX10PLUS: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
-; GFX10PLUS: v_mov_b32_dpp [[SRC]], [[SRC]] dpp8:[1,0,0,0,0,0,0,0]{{$}}
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX12,GFX12-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX12,GFX12-GISEL %s
+
define amdgpu_kernel void @dpp8_test(ptr addrspace(1) %out, i32 %in) {
+; GFX10-LABEL: dpp8_test:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_test:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_test:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
store i32 %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_wait_states:
-; GFX10PLUS-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
-; GFX10PLUS: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
-; GFX10PLUS: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[1,0,0,0,0,0,0,0]{{$}}
-; GFX10PLUS: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[5,0,0,0,0,0,0,0]{{$}}
define amdgpu_kernel void @dpp8_wait_states(ptr addrspace(1) %out, i32 %in) {
+; GFX10-LABEL: dpp8_wait_states:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[5,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_wait_states:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[5,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_wait_states:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_mov_b32_e32 v0, s2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[5,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_endpgm
%tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
%tmp1 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %tmp0, i32 5) #0
store i32 %tmp1, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_i64:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_i64(i64 %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_i64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_i64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_i64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_i64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_i64:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_i64:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call i64 @llvm.amdgcn.mov.dpp8.i64(i64 %in, i32 1)
store i64 %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v2i32:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_v2i32(<2 x i32> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v2i32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v2i32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v2i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v2i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v2i32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v2i32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <2 x i32> @llvm.amdgcn.mov.dpp8.v3i32(<2 x i32> %in, i32 1)
store <2 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v3i32:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx3|b96}} v[3:4], v[0:2], off
define amdgpu_ps void @dpp8_v3i32(<3 x i32> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v3i32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx3 v[3:4], v[0:2], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v3i32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx3 v[3:4], v[0:2], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v3i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v3i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v3i32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v3i32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <3 x i32> @llvm.amdgcn.mov.dpp8.v3i32(<3 x i32> %in, i32 1)
store <3 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v4i32:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx4|b128}} v[4:5], v[0:3], off
define amdgpu_ps void @dpp8_v4i32(<4 x i32> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v4i32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v4i32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v4i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v4i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v4i32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v4i32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b128 v[4:5], v[0:3], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <4 x i32> @llvm.amdgcn.mov.dpp8.v3i32(<4 x i32> %in, i32 1)
store <4 x i32> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_p0:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_p0(ptr %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_p0:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_p0:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_p0:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_p0:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_p0:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_p0:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call ptr @llvm.amdgcn.mov.dpp8.p0(ptr %in, i32 1)
store ptr %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_p3:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dword|b32}} v[1:2], v0, off
define amdgpu_ps void @dpp8_p3(ptr addrspace(3) %in, ptr addrspace(1) %out) {
+; GFX10-LABEL: dpp8_p3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_p3:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_p3:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b32 v[1:2], v0, off
+; GFX12-NEXT: s_endpgm
%tmp0 = call ptr addrspace(3) @llvm.amdgcn.mov.dpp8.v3p3(ptr addrspace(3) %in, i32 1)
store ptr addrspace(3) %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v3p3:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx3|b96}} v[3:4], v[0:2], off
define amdgpu_ps void @dpp8_v3p3(<3 x ptr addrspace(3)> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v3p3:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx3 v[3:4], v[0:2], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v3p3:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx3 v[3:4], v[0:2], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v3p3:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v3p3:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v3p3:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v3p3:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <3 x ptr addrspace(3)> @llvm.amdgcn.mov.dpp8.v3p3(<3 x ptr addrspace(3)> %in, i32 1)
store <3 x ptr addrspace(3)> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_i16:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{short|b16}} v[1:2], v0, off
define amdgpu_ps void @dpp8_i16(i16 %in, ptr addrspace(1) %out) {
+; GFX10-LABEL: dpp8_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b16 v[1:2], v0, off
+; GFX12-NEXT: s_endpgm
%tmp0 = call i16 @llvm.amdgcn.mov.dpp8.i16(i16 %in, i32 1)
store i16 %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v4i16:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_v4i16(<4 x i16> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v4i16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v4i16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v4i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v4i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v4i16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v4i16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <4 x i16> @llvm.amdgcn.mov.dpp8.v4i16(<4 x i16> %in, i32 1)
store <4 x i16> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v4f16:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_v4f16(<4 x half> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v4f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v4f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v4f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v4f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v4f16:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v4f16:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <4 x half> @llvm.amdgcn.mov.dpp8.v4f16(<4 x half> %in, i32 1)
store <4 x half> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_float:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dword|b32}} v[1:2], v0, off
define amdgpu_ps void @dpp8_float(float %in, ptr addrspace(1) %out) {
+; GFX10-LABEL: dpp8_float:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_float:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_float:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b32 v[1:2], v0, off
+; GFX12-NEXT: s_endpgm
%tmp0 = call float @llvm.amdgcn.mov.dpp8.f32(float %in, i32 1)
store float %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v3f32:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx3|b96}} v[3:4], v[0:2], off
define amdgpu_ps void @dpp8_v3f32(<3 x float> %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_v3f32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx3 v[3:4], v[0:2], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_v3f32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx3 v[3:4], v[0:2], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_v3f32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_v3f32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_v3f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_v3f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v2, v2 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b96 v[3:4], v[0:2], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call <3 x float> @llvm.amdgcn.mov.dpp8.v3f32(<3 x float> %in, i32 1)
store <3 x float> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_half:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{short|b16}} v[1:2], v0, off
define amdgpu_ps void @dpp8_half(half %in, ptr addrspace(1) %out) {
+; GFX10-LABEL: dpp8_half:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_half:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_half:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b16 v[1:2], v0, off
+; GFX12-NEXT: s_endpgm
%tmp0 = call half @llvm.amdgcn.mov.dpp8.f16(half %in, i32 1)
store half %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_bfloat:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{short|b16}} v[1:2], v0, off
define amdgpu_ps void @dpp8_bfloat(bfloat %in, ptr addrspace(1) %out) {
+; GFX10-LABEL: dpp8_bfloat:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_bfloat:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_bfloat:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b16 v[1:2], v0, off
+; GFX12-NEXT: s_endpgm
%tmp0 = call bfloat @llvm.amdgcn.mov.dpp8.bf16(bfloat %in, i32 1)
store bfloat %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_v4bf16:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_v4bf16(<4 x bfloat> %in, ptr addrspace(1) %out) {
+; GFX10-LABEL: dpp8_v4bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: dpp8_v4bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: dpp8_v4bf16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-NEXT: s_endpgm
%tmp0 = call <4 x bfloat> @llvm.amdgcn.mov.dpp8.v4bf16(<4 x bfloat> %in, i32 1)
store <4 x bfloat> %tmp0, ptr addrspace(1) %out
ret void
}
-; GFX10PLUS-LABEL: {{^}}dpp8_double:
-; GFX10PLUS-DAG: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
-; GFX10PLUS-DAG: global_store_{{dwordx2|b64}} v[2:3], v[0:1], off
define amdgpu_ps void @dpp8_double(double %in, ptr addrspace(1) %out) {
+; GFX10-SDAG-LABEL: dpp8_double:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: dpp8_double:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: dpp8_double:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_double:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: dpp8_double:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: dpp8_double:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: v_mov_b32_dpp v1, v1 dpp8:[1,0,0,0,0,0,0,0]
+; GFX12-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX12-GISEL-NEXT: s_endpgm
%tmp0 = call double @llvm.amdgcn.mov.dpp8.f64(double %in, i32 1)
store double %tmp0, ptr addrspace(1) %out
ret void
@@ -189,3 +736,5 @@ define amdgpu_ps void @dpp8_double(double %in, ptr addrspace(1) %out) {
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #0
attributes #0 = { nounwind readnone convergent }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10PLUS: {{.*}}
>From 5db4954374a81d6e1216d2581b4d5f3e6280347f Mon Sep 17 00:00:00 2001
From: abhigargrepo <abhigarg at amd.com>
Date: Mon, 21 Jul 2025 07:32:43 -0700
Subject: [PATCH 2/2] Insert IMPLICIT_DEF for a register sequence if any
operand is undef
---
.../lib/CodeGen/TwoAddressInstructionPass.cpp | 13 +-
.../AMDGPU/GlobalISel/mul-known-bits.i64.ll | 126 +-
llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll | 64 +-
.../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 2181 ++++++-------
.../test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll | 1265 ++++----
.../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 2752 +++++++++--------
.../GlobalISel/twoaddr-extract-dyn-v7f64.mir | 43 +-
.../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 4 +
.../test/CodeGen/AMDGPU/GlobalISel/udivrem.ll | 1096 +++----
.../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 4 +
.../CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll | 283 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll | 33 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll | 65 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll | 5 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll | 15 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll | 150 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll | 19 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll | 120 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll | 12 +-
.../CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll | 32 +-
.../atomic_optimizations_global_pointer.ll | 79 +-
.../atomic_optimizations_local_pointer.ll | 57 +-
llvm/test/CodeGen/AMDGPU/bf16.ll | 114 +-
...ffer-fat-pointers-contents-legalization.ll | 8 +-
.../build-vector-packed-partial-undef.ll | 206 +-
llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 20 +-
.../CodeGen/AMDGPU/div-rem-by-constant-64.ll | 123 +-
llvm/test/CodeGen/AMDGPU/div_i128.ll | 94 +-
llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 249 +-
.../AMDGPU/divergence-driven-buildvector.ll | 5 +-
.../early-lis-two-address-partial-def.mir | 9 +-
llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll | 203 +-
llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll | 142 +-
llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll | 2 +
llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll | 6 +-
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll | 40 +-
llvm/test/CodeGen/AMDGPU/frem.ll | 84 +-
.../AMDGPU/gfx-callable-argument-types.ll | 82 +-
llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll | 114 +-
llvm/test/CodeGen/AMDGPU/idot4s.ll | 51 +-
llvm/test/CodeGen/AMDGPU/idot4u.ll | 15 +-
.../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 7 +-
.../CodeGen/AMDGPU/integer-mad-patterns.ll | 661 ++--
.../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll | 252 +-
.../llvm.amdgcn.image.gather4.a16.dim.ll | 85 +-
.../AMDGPU/llvm.amdgcn.image.msaa.load.ll | 7 +-
.../llvm.amdgcn.image.sample.a16.dim.ll | 583 +++-
.../llvm.amdgcn.image.sample.g16.encode.ll | 181 +-
.../AMDGPU/llvm.amdgcn.image.sample.g16.ll | 181 +-
.../AMDGPU/llvm.amdgcn.image.sample.noret.ll | 54 +-
.../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll | 45 +-
.../CodeGen/AMDGPU/llvm.amdgcn.permlane.ll | 132 +-
.../CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll | 20 +-
.../llvm.amdgcn.raw.ptr.buffer.store.bf16.ll | 4 +-
.../AMDGPU/llvm.amdgcn.waitcnt.out.order.ll | 20 +-
.../CodeGen/AMDGPU/llvm.amdgcn.writelane.ll | 24 +-
llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll | 6 +-
llvm/test/CodeGen/AMDGPU/llvm.round.ll | 13 +-
llvm/test/CodeGen/AMDGPU/load-constant-i1.ll | 1739 +++++------
llvm/test/CodeGen/AMDGPU/load-constant-i16.ll | 1166 +++----
llvm/test/CodeGen/AMDGPU/load-constant-i8.ll | 1374 ++++----
llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 122 +-
llvm/test/CodeGen/AMDGPU/load-global-i8.ll | 1014 +++---
llvm/test/CodeGen/AMDGPU/load-local-i16.ll | 250 +-
llvm/test/CodeGen/AMDGPU/lround.ll | 16 +-
llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll | 32 +-
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 30 +-
llvm/test/CodeGen/AMDGPU/mad_64_32.ll | 130 +-
llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 125 +-
.../AMDGPU/reassoc-mul-add-1-to-mad.ll | 36 +-
.../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll | 12 +-
llvm/test/CodeGen/AMDGPU/scratch-simple.ll | 316 +-
.../AMDGPU/sext-in-reg-vector-shuffle.ll | 2 +
.../AMDGPU/shufflevector.v2f32.v2f32.ll | 125 +-
.../AMDGPU/shufflevector.v2f32.v3f32.ll | 128 +-
.../AMDGPU/shufflevector.v2f32.v4f32.ll | 108 +-
.../AMDGPU/shufflevector.v2f32.v8f32.ll | 216 +-
.../AMDGPU/shufflevector.v2i32.v2i32.ll | 125 +-
.../AMDGPU/shufflevector.v2i32.v3i32.ll | 128 +-
.../AMDGPU/shufflevector.v2i32.v4i32.ll | 108 +-
.../AMDGPU/shufflevector.v2i32.v8i32.ll | 216 +-
.../AMDGPU/shufflevector.v2i64.v2i64.ll | 153 +-
.../AMDGPU/shufflevector.v2i64.v3i64.ll | 144 +-
.../AMDGPU/shufflevector.v2i64.v4i64.ll | 156 +-
.../AMDGPU/shufflevector.v2i64.v8i64.ll | 492 ++-
.../CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll | 153 +-
.../CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll | 144 +-
.../CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll | 156 +-
.../CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll | 125 +-
.../CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll | 128 +-
.../CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll | 108 +-
.../CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll | 216 +-
.../AMDGPU/shufflevector.v3bf16.v3bf16.ll | 9 +
.../AMDGPU/shufflevector.v3bf16.v4bf16.ll | 9 +
.../AMDGPU/shufflevector.v3f16.v3f16.ll | 9 +
.../AMDGPU/shufflevector.v3f16.v4f16.ll | 9 +
.../AMDGPU/shufflevector.v3f32.v2f32.ll | 316 +-
.../AMDGPU/shufflevector.v3f32.v3f32.ll | 483 +--
.../AMDGPU/shufflevector.v3f32.v4f32.ll | 409 ++-
.../AMDGPU/shufflevector.v3i16.v3i16.ll | 9 +
.../AMDGPU/shufflevector.v3i16.v4i16.ll | 9 +
.../AMDGPU/shufflevector.v3i32.v2i32.ll | 316 +-
.../AMDGPU/shufflevector.v3i32.v3i32.ll | 483 +--
.../AMDGPU/shufflevector.v3i32.v4i32.ll | 409 ++-
.../AMDGPU/shufflevector.v3i64.v2i64.ll | 313 +-
.../AMDGPU/shufflevector.v3i64.v3i64.ll | 329 +-
.../AMDGPU/shufflevector.v3i64.v4i64.ll | 460 ++-
.../CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll | 313 +-
.../CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll | 329 +-
.../CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll | 460 ++-
.../CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll | 316 +-
.../CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll | 483 +--
.../CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll | 409 ++-
.../AMDGPU/shufflevector.v4bf16.v3bf16.ll | 75 +-
.../AMDGPU/shufflevector.v4bf16.v4bf16.ll | 75 +-
.../AMDGPU/shufflevector.v4f16.v3f16.ll | 75 +-
.../AMDGPU/shufflevector.v4f16.v4f16.ll | 75 +-
.../AMDGPU/shufflevector.v4f32.v2f32.ll | 164 +-
.../AMDGPU/shufflevector.v4f32.v3f32.ll | 1095 ++++---
.../AMDGPU/shufflevector.v4f32.v4f32.ll | 1128 ++++---
.../AMDGPU/shufflevector.v4i16.v3i16.ll | 75 +-
.../AMDGPU/shufflevector.v4i16.v4i16.ll | 75 +-
.../AMDGPU/shufflevector.v4i32.v2i32.ll | 164 +-
.../AMDGPU/shufflevector.v4i32.v3i32.ll | 1095 ++++---
.../AMDGPU/shufflevector.v4i32.v4i32.ll | 1128 ++++---
.../AMDGPU/shufflevector.v4i64.v2i64.ll | 384 ++-
.../AMDGPU/shufflevector.v4i64.v3i64.ll | 776 +++--
.../AMDGPU/shufflevector.v4i64.v4i64.ll | 890 ++++--
.../CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll | 384 ++-
.../CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll | 776 +++--
.../CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll | 890 ++++--
.../CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll | 164 +-
.../CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll | 1095 ++++---
.../CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll | 1128 ++++---
.../AMDGPU/subreg-coalescer-undef-use.ll | 11 +-
llvm/test/CodeGen/AMDGPU/uniform-select.ll | 4 +
llvm/test/CodeGen/AMDGPU/vector-reduce-mul.ll | 747 +++--
.../test/CodeGen/AMDGPU/vector-reduce-smax.ll | 414 ++-
.../test/CodeGen/AMDGPU/vector-reduce-smin.ll | 414 ++-
.../test/CodeGen/AMDGPU/vector_rebroadcast.ll | 7 +
.../CodeGen/AMDGPU/vector_shuffle.packed.ll | 31 +-
...wmma-gfx12-w64-f16-f32-matrix-modifiers.ll | 76 +-
142 files changed, 24298 insertions(+), 16752 deletions(-)
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8d94b40a41bea..7e161bfb39ac1 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1988,6 +1988,7 @@ void TwoAddressInstructionImpl::eliminateRegSequence(
SmallVector<Register, 4> OrigRegs;
VNInfo *DefVN = nullptr;
+ bool DefEmitted = false;
if (LIS) {
OrigRegs.push_back(MI.getOperand(0).getReg());
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
@@ -1998,9 +1999,17 @@ void TwoAddressInstructionImpl::eliminateRegSequence(
.valueOut();
}
}
-
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).isUndef()) {
+ // Insert the IMPLICIT_DEF on dst register.
+ MachineInstr *DefMI =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DstReg);
+ MBBI = DefMI;
+ DefEmitted = true;
+ break;
+ }
LaneBitmask UndefLanes = LaneBitmask::getNone();
- bool DefEmitted = false;
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI.getOperand(i);
Register SrcReg = UseMO.getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
index 1cd9c0bfeb7e6..84247841691ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
@@ -8,37 +8,40 @@ define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspac
; GFX10-LABEL: v_mul_i64_no_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
-; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v0
+; GFX10-NEXT: v_lshlrev_b32_e32 v8, 3, v0
+; GFX10-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: global_load_dwordx2 v[0:1], v7, s[0:1]
-; GFX10-NEXT: global_load_dwordx2 v[2:3], v7, s[2:3]
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v8, s[0:1]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v8, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, v0, v2, 0
-; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, v0, v3, v[5:6]
+; GFX10-NEXT: v_mov_b32_e32 v6, v5
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, v0, v3, v[6:7]
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v1, v2, v[5:6]
; GFX10-NEXT: v_mov_b32_e32 v5, v0
-; GFX10-NEXT: global_store_dwordx2 v7, v[4:5], s[2:3]
+; GFX10-NEXT: global_store_dwordx2 v8, v[4:5], s[2:3]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_mul_i64_no_zext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v9, 3, v0
+; GFX11-NEXT: v_lshlrev_b32_e32 v10, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: global_load_b64 v[0:1], v9, s[0:1]
-; GFX11-NEXT: global_load_b64 v[2:3], v9, s[2:3]
+; GFX11-NEXT: global_load_b64 v[0:1], v10, s[0:1]
+; GFX11-NEXT: global_load_b64 v[2:3], v10, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v3, v[5:6]
-; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v1, v2, v[6:7]
+; GFX11-NEXT: v_mov_b32_e32 v6, v5
+; GFX11-NEXT: v_mad_u64_u32 v[8:9], null, v0, v3, v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v5, v7
-; GFX11-NEXT: global_store_b64 v9, v[4:5], s[2:3]
+; GFX11-NEXT: v_mad_u64_u32 v[5:6], null, v1, v2, v[8:9]
+; GFX11-NEXT: global_store_b64 v10, v[4:5], s[2:3]
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.a = getelementptr inbounds i64, ptr addrspace(1) %aptr, i32 %tid
@@ -60,13 +63,14 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 2, v0
+; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
-; GFX10-NEXT: global_load_dword v4, v3, s[6:7]
+; GFX10-NEXT: global_load_dword v6, v3, s[6:7]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v0, v4, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v1, v4, v[0:1]
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v0, v6, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, v3
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v1, v6, v[4:5]
; GFX10-NEXT: v_mov_b32_e32 v3, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1]
@@ -78,18 +82,21 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b64 v[0:1], v1, s[2:3]
-; GFX11-NEXT: global_load_b32 v5, v2, s[4:5]
+; GFX11-NEXT: global_load_b32 v8, v2, s[4:5]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, v5, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v1, v5, v[0:1]
+; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, v8, 0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v4, v3
+; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v1, v8, v[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v3, v6
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[0:1]
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -112,13 +119,14 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0
+; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v4, v2, s[2:3]
+; GFX10-NEXT: global_load_dword v6, v2, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[6:7]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v4, v0, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v4, v1, v[0:1]
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v6, v0, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, v3
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v6, v1, v[4:5]
; GFX10-NEXT: v_mov_b32_e32 v3, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1]
@@ -130,18 +138,21 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v5, v1, s[2:3]
+; GFX11-NEXT: global_load_b32 v8, v1, s[2:3]
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[4:5]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v5, v0, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v5, v1, v[0:1]
+; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v8, v0, 0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v4, v3
+; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v8, v1, v[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v3, v6
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[0:1]
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -210,14 +221,15 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: global_load_dword v4, v2, s[2:3]
+; GFX10-NEXT: global_load_dword v6, v2, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[6:7]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v4, v0, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v4, v1, v[0:1]
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v6, v0, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, v3
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v6, v1, v[4:5]
; GFX10-NEXT: v_mov_b32_e32 v3, v0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1]
@@ -229,18 +241,21 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: global_load_b32 v5, v0, s[2:3]
+; GFX11-NEXT: global_load_b32 v8, v0, s[2:3]
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[4:5]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v5, v0, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v5, v1, v[0:1]
+; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v8, v0, 0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v4, v3
+; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v8, v1, v[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v3, v6
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[0:1]
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -390,16 +405,17 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
+; GFX10-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[6:7]
; GFX10-NEXT: s_waitcnt vmcnt(1)
-; GFX10-NEXT: v_and_b32_e32 v6, 0xfff00000, v0
+; GFX10-NEXT: v_and_b32_e32 v0, 0xfff00000, v0
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_mad_u64_u32 v[4:5], s2, v6, v2, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, v5
-; GFX10-NEXT: v_mad_u64_u32 v[5:6], s2, v6, v3, v[0:1]
+; GFX10-NEXT: v_mad_u64_u32 v[4:5], s2, v0, v2, 0
+; GFX10-NEXT: v_mov_b32_e32 v6, v5
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s2, v0, v3, v[6:7]
; GFX10-NEXT: v_and_b32_e32 v0, 0xf00f, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v0, v2, v[5:6]
; GFX10-NEXT: v_mov_b32_e32 v5, v0
@@ -413,6 +429,7 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -420,15 +437,15 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX11-NEXT: global_load_b64 v[0:1], v2, s[2:3]
; GFX11-NEXT: global_load_b64 v[2:3], v2, s[4:5]
; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_and_b32_e32 v7, 0xfff00000, v0
+; GFX11-NEXT: v_and_b32_e32 v0, 0xfff00000, v0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, v7, v2, 0
-; GFX11-NEXT: v_mov_b32_e32 v0, v5
+; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, 0
+; GFX11-NEXT: v_mov_b32_e32 v6, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mad_u64_u32 v[5:6], null, v7, v3, v[0:1]
+; GFX11-NEXT: v_mad_u64_u32 v[8:9], null, v0, v3, v[6:7]
; GFX11-NEXT: v_and_b32_e32 v3, 0xf00f, v1
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[5:6]
+; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[8:9]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v0, 0
; GFX11-NEXT: global_store_b64 v0, v[4:5], s[0:1]
@@ -510,7 +527,9 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
; GFX10-NEXT: ; %bb.1: ; %else
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, v2, v4, 0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s3, v2, v5, v[1:2]
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: v_mov_b32_e32 v3, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s3, v2, v5, v[3:4]
; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX10-NEXT: .LBB10_2: ; %Flow
@@ -547,11 +566,14 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
; GFX11-NEXT: ; %bb.1: ; %else
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v4, 0
+; GFX11-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[1:2]
-; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX11-NEXT: v_mov_b32_e32 v1, v3
+; GFX11-NEXT: v_mov_b32_e32 v3, v1
+; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v2, v5, v[3:4]
; GFX11-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v1, v6
; GFX11-NEXT: .LBB10_2: ; %Flow
; GFX11-NEXT: s_and_not1_saveexec_b32 s2, s2
; GFX11-NEXT: s_cbranch_execz .LBB10_4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
index 637aaf7529364..b2517431f6fbf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
@@ -548,7 +548,9 @@ define i64 @v_mul_i64(i64 %num, i64 %den) {
; GCN-NEXT: v_mov_b32_e32 v4, v0
; GCN-NEXT: v_mov_b32_e32 v5, v1
; GCN-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
-; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[1:2]
+; GCN-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GCN-NEXT: v_mov_b32_e32 v6, v1
+; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[6:7]
; GCN-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[3:4]
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -557,8 +559,10 @@ define i64 @v_mul_i64(i64 %num, i64 %den) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v4, v0
; GFX10-NEXT: v_mov_b32_e32 v5, v1
+; GFX10-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s4, v4, v3, v[1:2]
+; GFX10-NEXT: v_mov_b32_e32 v6, v1
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s4, v4, v3, v[6:7]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v5, v2, v[3:4]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -567,9 +571,11 @@ define i64 @v_mul_i64(i64 %num, i64 %den) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v2
; GFX11-NEXT: v_mov_b32_e32 v6, v1
+; GFX11-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v5, 0
-; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v4, v3, v[1:2]
-; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v6, v5, v[7:8]
+; GFX11-NEXT: v_mov_b32_e32 v7, v1
+; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v4, v3, v[7:8]
+; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v6, v5, v[9:10]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_mul_i64:
@@ -3129,34 +3135,40 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b64 s[0:1], 0
-; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[0:3], 0 addr64
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x50
+; GFX7-NEXT: buffer_load_dword v6, v[2:3], s[0:3], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX7-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v5, 0
-; GFX7-NEXT: v_ashrrev_i32_e32 v4, 31, v4
-; GFX7-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v5, v[3:4]
+; GFX7-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v7, 0
+; GFX7-NEXT: v_ashrrev_i32_e32 v6, 31, v6
+; GFX7-NEXT: v_mov_b32_e32 v4, v3
+; GFX7-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[4:5]
; GFX7-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: s_mul_u64_sext_with_vregs:
; GFX8: ; %bb.0:
-; GFX8-NEXT: flat_load_dword v4, v[2:3]
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x50
+; GFX8-NEXT: flat_load_dword v6, v[2:3]
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX8-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v4, v5, 0
-; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v4
-; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v4, v5, v[3:4]
+; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v6, v7, 0
+; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v6
+; GFX8-NEXT: v_mov_b32_e32 v4, v3
+; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v6, v7, v[4:5]
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_mul_u64_sext_with_vregs:
; GFX9: ; %bb.0:
-; GFX9-NEXT: global_load_dword v4, v[2:3], off
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x50
+; GFX9-NEXT: global_load_dword v6, v[2:3], off
+; GFX9-NEXT: v_mov_b32_e32 v7, 0x50
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v4, v5, 0
-; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v4
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v4, v5, v[3:4]
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v6, v7, 0
+; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v6
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v6, v7, v[4:5]
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: s_endpgm
;
@@ -3165,8 +3177,10 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
; GFX10-NEXT: global_load_dword v4, v[2:3], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, 0x50, v4, 0
-; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v4
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, 0x50, v4, v[3:4]
+; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v4
+; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX10-NEXT: v_mov_b32_e32 v4, v3
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, 0x50, v6, v[4:5]
; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX10-NEXT: s_endpgm
;
@@ -3175,9 +3189,11 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
; GFX11-NEXT: global_load_b32 v4, v[2:3], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, 0x50, v4, 0
-; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v4
-; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, 0x50, v6, v[3:4]
-; GFX11-NEXT: v_mov_b32_e32 v3, v4
+; GFX11-NEXT: v_ashrrev_i32_e32 v8, 31, v4
+; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX11-NEXT: v_mov_b32_e32 v4, v3
+; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, 0x50, v8, v[4:5]
+; GFX11-NEXT: v_mov_b32_e32 v3, v6
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index f57fc005b994b..5275ba3fd7bcc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -39,72 +39,74 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v3
; CHECK-NEXT: v_trunc_f32_e32 v8, v6
; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8
-; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v3
+; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v8
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0
-; CHECK-NEXT: v_mov_b32_e32 v3, v7
-; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4]
-; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6
-; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; CHECK-NEXT: v_mul_hi_u32 v8, v9, v6
+; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v3, 0
+; CHECK-NEXT: v_mov_b32_e32 v8, v7
+; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[8:9]
+; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6
+; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v3, v[7:8]
+; CHECK-NEXT: v_mul_lo_u32 v8, v12, v6
; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6
-; CHECK-NEXT: v_mul_lo_u32 v13, v9, v7
+; CHECK-NEXT: v_mul_lo_u32 v13, v3, v7
; CHECK-NEXT: v_mul_lo_u32 v14, v12, v7
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v13
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v13
; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8
-; CHECK-NEXT: v_mul_hi_u32 v8, v9, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v13, v3
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT: v_mul_hi_u32 v9, v3, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v13, v8
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v14, v6
; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
+; CHECK-NEXT: v_mul_hi_u32 v7, v12, v7
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v13, v8
-; CHECK-NEXT: v_mul_hi_u32 v7, v12, v7
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v3
-; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v6, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0
-; CHECK-NEXT: v_mov_b32_e32 v3, v7
-; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4]
-; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v5
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v10
-; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v10, vcc
-; CHECK-NEXT: v_xor_b32_e32 v8, v3, v10
-; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6
-; CHECK-NEXT: v_mul_lo_u32 v5, v9, v7
-; CHECK-NEXT: v_xor_b32_e32 v11, v4, v10
-; CHECK-NEXT: v_mul_hi_u32 v4, v9, v6
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v7, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v3, 0
+; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
+; CHECK-NEXT: v_mov_b32_e32 v8, v7
+; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[8:9]
+; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v3, v[7:8]
+; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; CHECK-NEXT: v_xor_b32_e32 v8, v4, v9
+; CHECK-NEXT: v_mul_lo_u32 v4, v12, v6
+; CHECK-NEXT: v_mul_lo_u32 v10, v3, v7
+; CHECK-NEXT: v_xor_b32_e32 v11, v5, v9
+; CHECK-NEXT: v_mul_hi_u32 v5, v3, v6
; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v4, v12, v7
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v5, v9, v7
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10
+; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT: v_mul_hi_u32 v6, v12, v7
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_mul_lo_u32 v5, v12, v7
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; CHECK-NEXT: v_mul_hi_u32 v10, v3, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; CHECK-NEXT: v_mul_hi_u32 v7, v12, v7
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3
-; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v12, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, v11, v3
; CHECK-NEXT: v_mul_lo_u32 v6, v8, v4
; CHECK-NEXT: v_mul_hi_u32 v7, v8, v3
; CHECK-NEXT: v_mul_hi_u32 v3, v11, v3
-; CHECK-NEXT: v_mul_hi_u32 v9, v11, v4
+; CHECK-NEXT: v_mul_hi_u32 v10, v11, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
@@ -121,38 +123,40 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v7, 0
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v5
-; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
+; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v5
+; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v8, v3
+; CHECK-NEXT: v_mov_b32_e32 v5, v4
+; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v10, v[5:6]
; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v7, v[4:5]
; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v11, v4, vcc
; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v11, v4
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1
; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2
; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1
; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5]
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v7
-; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v5, v6, v8, s[4:5]
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v7
+; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v8
-; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v9, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v6
+; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
-; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
+; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3
; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
@@ -220,60 +224,64 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v0
-; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v2
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2]
-; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
-; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT: v_mul_lo_u32 v6, v3, v1
-; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1
-; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1
-; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
+; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v0
+; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v4, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v5, v[2:3]
+; CHECK-NEXT: v_mul_hi_u32 v3, v4, v0
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v4, v[1:2]
+; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0
+; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
+; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1
+; CHECK-NEXT: v_mul_lo_u32 v7, v5, v1
+; CHECK-NEXT: v_mul_hi_u32 v8, v4, v1
+; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0
-; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
-; CHECK-NEXT: v_mul_hi_u32 v6, v3, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2]
-; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
-; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v0
+; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v4, 0
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v5, v[2:3]
+; CHECK-NEXT: v_mul_hi_u32 v6, v4, v0
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v4, v[1:2]
+; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0
+; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1
+; CHECK-NEXT: v_mul_lo_u32 v6, v5, v1
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, s13, v0
; CHECK-NEXT: v_mul_lo_u32 v3, s12, v1
; CHECK-NEXT: v_mul_hi_u32 v4, s12, v0
@@ -295,12 +303,14 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v2, v[1:2]
-; CHECK-NEXT: v_mov_b32_e32 v5, s13
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s12, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v5, v[2:3]
+; CHECK-NEXT: v_mov_b32_e32 v5, s13
; CHECK-NEXT: v_mov_b32_e32 v3, s11
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s13, v1
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v2
@@ -383,6 +393,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v10
; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v4
; GISEL-NEXT: v_sub_i32_e32 v15, vcc, 0, v10
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
; GISEL-NEXT: v_subb_u32_e32 v16, vcc, 0, v4, vcc
; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v9
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
@@ -390,184 +401,188 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v5
; GISEL-NEXT: v_trunc_f32_e32 v9, v9
; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v5
+; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0
-; GISEL-NEXT: v_mov_b32_e32 v5, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v9, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v5, v9, v11
-; GISEL-NEXT: v_mul_hi_u32 v17, v14, v11
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13]
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v5, 0
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v9, v[13:14]
+; GISEL-NEXT: v_mul_hi_u32 v17, v5, v11
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v5, v[12:13]
+; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11
-; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13
+; GISEL-NEXT: v_mul_lo_u32 v14, v5, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v17, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5
-; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_mul_hi_u32 v14, v5, v12
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
+; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13
-; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v5
-; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v11, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v12, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v5, 0
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_mov_b32_e32 v5, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[5:6]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13]
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[13:14]
; GISEL-NEXT: v_xor_b32_e32 v15, v0, v9
; GISEL-NEXT: v_mul_lo_u32 v0, v17, v11
-; GISEL-NEXT: v_mul_lo_u32 v5, v14, v12
-; GISEL-NEXT: v_xor_b32_e32 v16, v1, v9
-; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v5, v[12:13]
+; GISEL-NEXT: v_mul_hi_u32 v14, v5, v11
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v5, v12
; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9
+; GISEL-NEXT: v_xor_b32_e32 v8, v9, v8
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v5, v14, v12
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
+; GISEL-NEXT: v_mul_lo_u32 v14, v17, v12
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0
+; GISEL-NEXT: v_mul_hi_u32 v13, v5, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5
-; GISEL-NEXT: v_mul_hi_u32 v11, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v16, v0
-; GISEL-NEXT: v_mul_lo_u32 v11, v15, v1
-; GISEL-NEXT: v_mul_hi_u32 v12, v15, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0
-; GISEL-NEXT: v_xor_b32_e32 v8, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v17, v11, vcc
+; GISEL-NEXT: v_mul_lo_u32 v11, v1, v0
+; GISEL-NEXT: v_mul_lo_u32 v12, v15, v5
+; GISEL-NEXT: v_mul_hi_u32 v13, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v1, v0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v12, v16, v1
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5
-; GISEL-NEXT: v_mul_hi_u32 v11, v15, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
+; GISEL-NEXT: v_mul_lo_u32 v13, v1, v5
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_mul_hi_u32 v12, v15, v5
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v12
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1
+; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v0, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v1, v5
-; GISEL-NEXT: v_mov_b32_e32 v1, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v10, v14, v[1:2]
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v5, v13
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v5
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v5, vcc
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v10, v16, v[13:14]
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v5
; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v4, v0, v[12:13]
-; GISEL-NEXT: v_xor_b32_e32 v7, v1, v5
-; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5
-; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7
-; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v5, vcc
+; GISEL-NEXT: v_xor_b32_e32 v7, v6, v5
+; GISEL-NEXT: v_xor_b32_e32 v6, v13, v5
+; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v7
+; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v6
; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v15, v11
-; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v16, v12
-; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v13
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v16, v12, vcc
-; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v11, v4, vcc
-; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
-; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v1
-; GISEL-NEXT: v_trunc_f32_e32 v16, v11
-; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v16
-; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v1
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v1, v12, vcc
+; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13
+; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v12
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11
+; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v11
+; GISEL-NEXT: v_trunc_f32_e32 v13, v12
+; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13
+; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v11
; GISEL-NEXT: v_sub_i32_e32 v19, vcc, 0, v7
; GISEL-NEXT: v_subb_u32_e32 v20, vcc, 0, v6, vcc
; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v19, v18, 0
-; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v16
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v10
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v10
-; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v13, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
-; GISEL-NEXT: v_mov_b32_e32 v1, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v19, v16, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v15, v4
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v20, v18, v[12:13]
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v15, v10
+; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v13
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v14, v10
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GISEL-NEXT: v_mul_lo_u32 v10, v15, v11
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v19, v15, v[13:14]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v20, v18, v[12:13]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v17, v4
-; GISEL-NEXT: v_mul_lo_u32 v10, v18, v12
-; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, v18, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
-; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11
-; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v15, v4
+; GISEL-NEXT: v_mul_lo_u32 v13, v18, v12
+; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v18, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT: v_mul_hi_u32 v11, v15, v11
+; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[8:9]
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v4
; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v17, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v13, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v15, v21, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9]
; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v10
-; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v15, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v17, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v21, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v13, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v17, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v15, v15, v21, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
-; GISEL-NEXT: v_mul_lo_u32 v13, v16, v12
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; GISEL-NEXT: v_mul_hi_u32 v13, v18, v12
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v21, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT: v_mul_hi_u32 v14, v18, v12
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_mul_hi_u32 v12, v16, v12
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13
+; GISEL-NEXT: v_mul_hi_u32 v12, v15, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v18, v10
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v16, v11, vcc
+; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v15, v11, vcc
; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v19, v12, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
; GISEL-NEXT: v_mov_b32_e32 v0, v11
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v13, v[0:1]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v14, v[0:1]
; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc
+; GISEL-NEXT: v_xor_b32_e32 v9, v4, v8
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v12, v[0:1]
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v11
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v11, vcc
; GISEL-NEXT: v_xor_b32_e32 v15, v1, v11
-; GISEL-NEXT: v_mul_lo_u32 v1, v13, v10
+; GISEL-NEXT: v_mul_lo_u32 v1, v14, v10
; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
; GISEL-NEXT: v_xor_b32_e32 v16, v2, v11
; GISEL-NEXT: v_mul_hi_u32 v2, v12, v10
-; GISEL-NEXT: v_xor_b32_e32 v9, v4, v8
+; GISEL-NEXT: v_mul_hi_u32 v4, v14, v10
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v13, v10
+; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1
; GISEL-NEXT: v_mul_hi_u32 v3, v12, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4
@@ -575,18 +590,18 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v13, v0, vcc
+; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v14, v0, vcc
; GISEL-NEXT: v_mul_lo_u32 v2, v16, v1
; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0
; GISEL-NEXT: v_mul_hi_u32 v4, v15, v1
; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1
-; GISEL-NEXT: v_xor_b32_e32 v10, v14, v8
+; GISEL-NEXT: v_xor_b32_e32 v10, v13, v8
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4
@@ -605,6 +620,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v1
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: v_mov_b32_e32 v0, v3
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v13, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v9, v8
@@ -667,100 +683,106 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_xor_b32_e32 v1, v3, v0
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v1
-; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v2
-; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v15, vcc, 0, v2
+; CGP-NEXT: ; implicit-def: $vgpr12_vgpr13
+; CGP-NEXT: v_subb_u32_e32 v16, vcc, 0, v1, vcc
; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
; CGP-NEXT: v_trunc_f32_e32 v5, v4
; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v12, v3
-; CGP-NEXT: v_cvt_u32_f32_e32 v15, v5
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5]
-; CGP-NEXT: v_mul_hi_u32 v16, v12, v3
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5]
-; CGP-NEXT: v_mul_lo_u32 v5, v15, v3
-; CGP-NEXT: v_mul_hi_u32 v3, v15, v3
-; CGP-NEXT: v_mul_lo_u32 v17, v12, v4
-; CGP-NEXT: v_mul_lo_u32 v18, v15, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16
-; CGP-NEXT: v_mul_hi_u32 v16, v12, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v14, v3
+; CGP-NEXT: v_cvt_u32_f32_e32 v17, v5
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v15, v14, 0
+; CGP-NEXT: v_mov_b32_e32 v12, v4
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v17, v[12:13]
+; CGP-NEXT: v_mul_hi_u32 v12, v14, v3
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v16, v14, v[4:5]
+; CGP-NEXT: v_mul_lo_u32 v5, v17, v3
+; CGP-NEXT: v_mul_hi_u32 v3, v17, v3
+; CGP-NEXT: v_mul_lo_u32 v13, v14, v4
+; CGP-NEXT: v_mul_lo_u32 v18, v17, v4
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
+; CGP-NEXT: v_mul_hi_u32 v12, v14, v4
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v13, v5
; CGP-NEXT: v_add_i32_e32 v3, vcc, v18, v3
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
-; CGP-NEXT: v_mul_hi_u32 v4, v15, v4
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT: v_mul_hi_u32 v4, v17, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v3
-; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v4, vcc
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5]
-; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v11
-; CGP-NEXT: v_mul_hi_u32 v16, v12, v3
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5]
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v13
-; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v13, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v5, v13
-; CGP-NEXT: v_mul_lo_u32 v5, v15, v3
-; CGP-NEXT: v_mul_lo_u32 v14, v12, v4
-; CGP-NEXT: v_mul_hi_u32 v3, v15, v3
-; CGP-NEXT: v_xor_b32_e32 v10, v10, v13
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v3
+; CGP-NEXT: v_addc_u32_e32 v17, vcc, v17, v4, vcc
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v15, v14, 0
+; CGP-NEXT: ; implicit-def: $vgpr12_vgpr13
+; CGP-NEXT: v_mov_b32_e32 v12, v4
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v15, v17, v[12:13]
+; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v16, v14, v[4:5]
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v12
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v12, vcc
+; CGP-NEXT: v_xor_b32_e32 v13, v5, v12
+; CGP-NEXT: v_mul_lo_u32 v5, v17, v3
+; CGP-NEXT: v_mul_lo_u32 v11, v14, v4
+; CGP-NEXT: v_xor_b32_e32 v15, v10, v12
+; CGP-NEXT: v_mul_hi_u32 v10, v14, v3
+; CGP-NEXT: v_mul_hi_u32 v3, v17, v3
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v16, v15, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5
-; CGP-NEXT: v_mul_hi_u32 v14, v12, v4
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT: v_mul_hi_u32 v4, v15, v4
+; CGP-NEXT: v_mul_lo_u32 v10, v17, v4
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
+; CGP-NEXT: v_mul_hi_u32 v11, v14, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT: v_mul_hi_u32 v4, v17, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3
-; CGP-NEXT: v_addc_u32_e32 v4, vcc, v15, v4, vcc
-; CGP-NEXT: v_mul_lo_u32 v5, v10, v3
-; CGP-NEXT: v_mul_lo_u32 v12, v11, v4
-; CGP-NEXT: v_mul_hi_u32 v14, v11, v3
-; CGP-NEXT: v_mul_hi_u32 v3, v10, v3
-; CGP-NEXT: v_mul_hi_u32 v15, v10, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v14, v10, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT: v_mul_hi_u32 v12, v11, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
+; CGP-NEXT: v_addc_u32_e32 v4, vcc, v17, v4, vcc
+; CGP-NEXT: v_mul_lo_u32 v5, v15, v3
+; CGP-NEXT: v_mul_lo_u32 v10, v13, v4
+; CGP-NEXT: v_mul_hi_u32 v11, v13, v3
+; CGP-NEXT: v_mul_hi_u32 v3, v15, v3
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v15, v4
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
+; CGP-NEXT: v_mul_hi_u32 v10, v13, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v3
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; CGP-NEXT: v_add_i32_e32 v14, vcc, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v11, v15, v4
; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v14, 0
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v5
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v12, v[4:5]
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v11, v3
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
+; CGP-NEXT: v_add_i32_e32 v16, vcc, v11, v5
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v13, v3
+; CGP-NEXT: v_mov_b32_e32 v10, v4
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v16, v[10:11]
; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v14, v[4:5]
-; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v10, v4, vcc
-; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v10, v4
+; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v15, v4, vcc
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v15, v4
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1
; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
@@ -771,13 +793,13 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v11, s[4:5]
; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v14
-; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc
+; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v16, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
-; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v2, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v2, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v10
; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
@@ -785,8 +807,8 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc
-; CGP-NEXT: v_xor_b32_e32 v3, v13, v0
-; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc
+; CGP-NEXT: v_xor_b32_e32 v3, v12, v0
+; CGP-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v1, v3
; CGP-NEXT: v_xor_b32_e32 v1, v2, v3
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
@@ -840,100 +862,106 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_xor_b32_e32 v3, v5, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v5, v4
; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v4
-; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v4
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v3, vcc
; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
; CGP-NEXT: v_trunc_f32_e32 v7, v6
; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v10, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v13, v7
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7]
-; CGP-NEXT: v_mul_hi_u32 v14, v10, v5
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7]
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v5
-; CGP-NEXT: v_mul_hi_u32 v5, v13, v5
-; CGP-NEXT: v_mul_lo_u32 v15, v10, v6
-; CGP-NEXT: v_mul_lo_u32 v16, v13, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT: v_mul_hi_u32 v14, v10, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v12, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v15, v7
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0
+; CGP-NEXT: v_mov_b32_e32 v10, v6
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[10:11]
+; CGP-NEXT: v_mul_hi_u32 v10, v12, v5
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7]
+; CGP-NEXT: v_mul_lo_u32 v7, v15, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v15, v5
+; CGP-NEXT: v_mul_lo_u32 v11, v12, v6
+; CGP-NEXT: v_mul_lo_u32 v16, v15, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_hi_u32 v10, v12, v6
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT: v_mul_hi_u32 v6, v13, v6
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v6, v15, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v5
-; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v6, vcc
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7]
-; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9
-; CGP-NEXT: v_mul_hi_u32 v14, v10, v5
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7]
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v11
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc
-; CGP-NEXT: v_xor_b32_e32 v9, v7, v11
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v5
-; CGP-NEXT: v_mul_lo_u32 v12, v10, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v13, v5
-; CGP-NEXT: v_xor_b32_e32 v8, v8, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v5
+; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v6, vcc
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_mov_b32_e32 v10, v6
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[10:11]
+; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7]
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v10
+; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc
+; CGP-NEXT: v_xor_b32_e32 v9, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v7, v15, v5
+; CGP-NEXT: v_mul_lo_u32 v11, v12, v6
+; CGP-NEXT: v_xor_b32_e32 v13, v8, v10
+; CGP-NEXT: v_mul_hi_u32 v8, v12, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v15, v5
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v14, v13, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7
-; CGP-NEXT: v_mul_hi_u32 v12, v10, v6
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
-; CGP-NEXT: v_mul_hi_u32 v6, v13, v6
+; CGP-NEXT: v_mul_lo_u32 v8, v15, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v12, v6
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_mul_hi_u32 v6, v15, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v8, v5
-; CGP-NEXT: v_mul_lo_u32 v10, v9, v6
-; CGP-NEXT: v_mul_hi_u32 v12, v9, v5
-; CGP-NEXT: v_mul_hi_u32 v5, v8, v5
-; CGP-NEXT: v_mul_hi_u32 v13, v8, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v8, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7
-; CGP-NEXT: v_mul_hi_u32 v10, v9, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v5, v7
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v12, 0
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v15, v6, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, v13, v5
+; CGP-NEXT: v_mul_lo_u32 v8, v9, v6
+; CGP-NEXT: v_mul_hi_u32 v11, v9, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v13, v5
+; CGP-NEXT: v_mul_hi_u32 v12, v13, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v7
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v10, v[6:7]
+; CGP-NEXT: v_mul_lo_u32 v11, v13, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_mul_hi_u32 v8, v9, v6
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v5, v7
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v11, 0
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v7
+; CGP-NEXT: ; implicit-def: $vgpr7_vgpr8
; CGP-NEXT: v_sub_i32_e32 v5, vcc, v9, v5
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[6:7]
-; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v8, v6, vcc
-; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v8, v6
+; CGP-NEXT: v_mov_b32_e32 v7, v6
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v12, v[7:8]
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v11, v[6:7]
+; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v13, v6, vcc
+; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3
; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
@@ -943,8 +971,8 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3
; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
; CGP-NEXT: v_cndmask_b32_e64 v7, v8, v9, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v11
+; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4
@@ -957,9 +985,9 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v5, v11, v2
-; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
+; CGP-NEXT: v_xor_b32_e32 v5, v10, v2
+; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
; CGP-NEXT: v_xor_b32_e32 v2, v3, v5
; CGP-NEXT: v_xor_b32_e32 v3, v4, v5
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
@@ -1049,77 +1077,81 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; CHECK-NEXT: v_mov_b32_e32 v7, 0xffed2705
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; CHECK-NEXT: v_trunc_f32_e32 v4, v3
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v4
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
-; CHECK-NEXT: v_mul_lo_u32 v4, v7, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
-; CHECK-NEXT: v_mul_lo_u32 v9, v5, v3
-; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3
-; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
+; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v2
+; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v4
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[4:5]
+; CHECK-NEXT: v_mul_hi_u32 v5, v6, v2
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v6, v[3:4]
+; CHECK-NEXT: v_mul_lo_u32 v4, v8, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v8, v2
+; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3
+; CHECK-NEXT: v_mul_lo_u32 v10, v8, v3
+; CHECK-NEXT: v_mul_hi_u32 v11, v6, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v8, v3
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
-; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
-; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
-; CHECK-NEXT: v_xor_b32_e32 v4, v0, v6
-; CHECK-NEXT: v_mul_lo_u32 v0, v7, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3
-; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6
-; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v2
+; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v3, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[4:5]
+; CHECK-NEXT: v_ashrrev_i32_e32 v5, 31, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v6, v[3:4]
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT: v_xor_b32_e32 v4, v0, v5
+; CHECK-NEXT: v_mul_lo_u32 v0, v8, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, v6, v3
+; CHECK-NEXT: v_xor_b32_e32 v9, v1, v5
+; CHECK-NEXT: v_mul_hi_u32 v1, v6, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v8, v2
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3
+; CHECK-NEXT: v_mul_lo_u32 v1, v8, v3
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT: v_mul_hi_u32 v7, v6, v3
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT: v_mul_hi_u32 v3, v8, v3
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0
; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0
-; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb
+; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
@@ -1134,39 +1166,41 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v8, v9, v1
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v2
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2]
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[2:3]
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v4, s[4:5]
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v7
-; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v3, s[4:5]
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v7
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3
+; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v4, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6
-; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
+; CHECK-NEXT: v_xor_b32_e32 v0, v0, v5
+; CHECK-NEXT: v_xor_b32_e32 v1, v1, v5
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = sdiv i64 %num, 1235195
ret i64 %result
@@ -1179,6 +1213,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; GISEL-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
@@ -1215,42 +1250,43 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4
-; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v4, 0
+; GISEL-NEXT: ; implicit-def: $vgpr15_vgpr16
; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v14
-; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13
-; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
+; GISEL-NEXT: v_mov_b32_e32 v15, v14
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[15:16]
+; GISEL-NEXT: v_mul_lo_u32 v9, v17, v13
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v4, v[14:15]
; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14
+; GISEL-NEXT: v_mul_lo_u32 v15, v4, v14
; GISEL-NEXT: s_subb_u32 s6, 0, 0
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13
-; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT: v_mul_hi_u32 v15, v4, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v13
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v15, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v4, v14
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v9
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v16
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v14
; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v15, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v13, v19, v0
; GISEL-NEXT: v_mul_lo_u32 v14, v18, v1
@@ -1275,8 +1311,10 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v13
-; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[1:2]
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v18, v0
+; GISEL-NEXT: v_mov_b32_e32 v13, v1
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[13:14]
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v19, v13, vcc
; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v19, v13
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
@@ -1291,8 +1329,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc
-; GISEL-NEXT: v_mov_b32_e32 v0, v5
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5]
+; GISEL-NEXT: v_mov_b32_e32 v0, v5
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
@@ -1318,72 +1357,76 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v1
+; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v8, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v16, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9
-; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v15, v13, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
-; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11
-; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5
-; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11
-; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc
+; GISEL-NEXT: v_xor_b32_e32 v13, v7, v9
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v16, v5, vcc
+; GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v11, v[7:8]
+; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, v[5:6]
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v7
+; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v10, v5
+; GISEL-NEXT: v_xor_b32_e32 v1, v12, v9
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v7
+; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5
+; GISEL-NEXT: v_mul_lo_u32 v3, v11, v5
+; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v10, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3
-; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
-; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v10, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT: v_mul_lo_u32 v6, v8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v9
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v3
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT: v_mul_lo_u32 v9, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v6, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v3, v5
+; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v9, 0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5
+; GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, v3
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[5:6]
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
@@ -1391,24 +1434,24 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7
-; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
+; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v5, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
@@ -1417,6 +1460,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; CGP-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
@@ -1450,39 +1494,40 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
-; CGP-NEXT: v_add_i32_e32 v16, vcc, v7, v4
-; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v4, 0
+; CGP-NEXT: ; implicit-def: $vgpr15_vgpr16
; CGP-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc
-; CGP-NEXT: v_mov_b32_e32 v4, v14
-; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT: v_mul_lo_u32 v4, v17, v13
-; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT: v_mul_lo_u32 v9, v16, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v16, v13
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_hi_u32 v9, v17, v13
-; CGP-NEXT: v_mul_lo_u32 v13, v17, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v15, v16, v14
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v9, v15
+; CGP-NEXT: v_mov_b32_e32 v15, v14
+; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[15:16]
+; CGP-NEXT: v_mul_lo_u32 v9, v17, v13
+; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v4, v[14:15]
+; CGP-NEXT: v_mul_lo_u32 v15, v4, v14
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT: v_mul_hi_u32 v15, v4, v13
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v13, v17, v13
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v15, v17, v14
+; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v4, v14
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v9
; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
; CGP-NEXT: v_xor_b32_e32 v18, v0, v9
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v17, v14
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v16
+; CGP-NEXT: v_mul_hi_u32 v13, v17, v14
; CGP-NEXT: v_xor_b32_e32 v19, v1, v9
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v15, v1
; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v19, v0
; CGP-NEXT: v_mul_lo_u32 v14, v18, v1
@@ -1507,115 +1552,122 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v13
-; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[1:2]
+; CGP-NEXT: ; implicit-def: $vgpr13_vgpr14
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v18, v0
+; CGP-NEXT: v_mov_b32_e32 v13, v1
+; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[13:14]
; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v19, v13, vcc
; CGP-NEXT: v_sub_i32_e64 v13, s[4:5], v19, v13
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
-; CGP-NEXT: v_add_i32_e32 v17, vcc, 1, v15
-; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v16, vcc
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_mov_b32_e32 v0, v5
+; CGP-NEXT: v_sub_i32_e32 v13, vcc, v0, v4
+; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
+; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
; CGP-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v0, v5
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v15
+; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v16, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v13, v4
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
-; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v19, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v17
-; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_mul_lo_u32 v5, v7, v0
-; CGP-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v5
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; CGP-NEXT: v_cndmask_b32_e32 v13, -1, v13, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v5
+; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v18, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT: v_mul_lo_u32 v13, v7, v0
+; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v17, v18, v17, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v13
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v10, v8, v0
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; CGP-NEXT: v_mul_hi_u32 v5, v7, v0
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT: v_mul_lo_u32 v11, v8, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; CGP-NEXT: v_mul_hi_u32 v10, v7, v0
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v1
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v7, v1
+; CGP-NEXT: v_addc_u32_e32 v11, vcc, v8, v0, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
-; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v17, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v5, v9
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
-; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v13, vcc
-; CGP-NEXT: v_xor_b32_e32 v1, v10, v9
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6]
-; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT: v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT: v_mul_lo_u32 v2, v8, v0
-; CGP-NEXT: v_mul_lo_u32 v6, v7, v5
-; CGP-NEXT: v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT: v_mul_hi_u32 v3, v7, v0
-; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT: ; implicit-def: $vgpr7_vgpr8
+; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc
+; CGP-NEXT: v_mov_b32_e32 v7, v1
+; CGP-NEXT: v_xor_b32_e32 v13, v5, v9
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v11, v[7:8]
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_cndmask_b32_e32 v12, v16, v17, vcc
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v10, v[5:6]
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT: v_xor_b32_e32 v8, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
+; CGP-NEXT: v_mul_lo_u32 v6, v10, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v12, v9
+; CGP-NEXT: v_xor_b32_e32 v12, v3, v7
+; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v3, v8, v5
+; CGP-NEXT: v_mul_lo_u32 v3, v11, v5
+; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT: v_mul_hi_u32 v6, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v6, v10, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v8, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v11, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v0
-; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc
-; CGP-NEXT: v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT: v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v9
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v0
+; CGP-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; CGP-NEXT: v_mul_lo_u32 v5, v12, v3
+; CGP-NEXT: v_mul_lo_u32 v6, v8, v2
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v13, v9
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CGP-NEXT: v_mul_hi_u32 v9, v8, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
+; CGP-NEXT: v_mul_lo_u32 v9, v12, v2
+; CGP-NEXT: v_mul_hi_u32 v3, v12, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v6, v8, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v3, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v13, v2
-; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v10, v12, v2
+; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v9, 0
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v5
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v5
+; CGP-NEXT: ; implicit-def: $vgpr5_vgpr6
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT: v_mov_b32_e32 v5, v3
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[5:6]
+; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
@@ -1623,24 +1675,24 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v7
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v9
+; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6
-; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v5, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
@@ -1676,126 +1728,132 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
; CHECK-NEXT: v_xor_b32_e32 v1, v5, v0
; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v2
; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1
-; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v2
-; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc
+; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v2
+; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v1, vcc
; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5
; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
; CHECK-NEXT: v_trunc_f32_e32 v7, v6
; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7
-; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7]
-; CHECK-NEXT: v_mul_hi_u32 v12, v8, v5
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT: v_mul_lo_u32 v7, v11, v5
-; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT: v_mul_lo_u32 v13, v8, v6
-; CHECK-NEXT: v_mul_lo_u32 v14, v11, v6
+; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v5
+; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v7
+; CHECK-NEXT: ; implicit-def: $vgpr7_vgpr8
+; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v9, 0
+; CHECK-NEXT: v_mov_b32_e32 v7, v6
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v12, v[7:8]
+; CHECK-NEXT: v_mul_hi_u32 v8, v9, v5
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v9, v[6:7]
+; CHECK-NEXT: v_mul_lo_u32 v7, v12, v5
+; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5
+; CHECK-NEXT: v_mul_lo_u32 v13, v9, v6
+; CHECK-NEXT: v_mul_lo_u32 v14, v12, v6
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT: v_mul_hi_u32 v12, v8, v6
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT: v_mul_hi_u32 v8, v9, v6
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v7
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v14, v5
; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12
-; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v13, v8
+; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v5
-; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v6, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7]
-; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc
-; CHECK-NEXT: v_xor_b32_e32 v7, v3, v9
-; CHECK-NEXT: v_mul_lo_u32 v3, v11, v5
-; CHECK-NEXT: v_mul_lo_u32 v10, v8, v6
-; CHECK-NEXT: v_xor_b32_e32 v12, v4, v9
-; CHECK-NEXT: v_mul_hi_u32 v4, v8, v5
-; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v5
+; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v6, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v9, 0
+; CHECK-NEXT: ; implicit-def: $vgpr7_vgpr8
+; CHECK-NEXT: v_mov_b32_e32 v7, v6
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v12, v[7:8]
+; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v9, v[6:7]
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; CHECK-NEXT: v_xor_b32_e32 v7, v3, v8
+; CHECK-NEXT: v_mul_lo_u32 v3, v12, v5
+; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6
+; CHECK-NEXT: v_xor_b32_e32 v11, v4, v8
+; CHECK-NEXT: v_mul_hi_u32 v4, v9, v5
+; CHECK-NEXT: v_mul_hi_u32 v5, v12, v5
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6
+; CHECK-NEXT: v_mul_lo_u32 v4, v12, v6
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3
-; CHECK-NEXT: v_mul_hi_u32 v10, v8, v6
+; CHECK-NEXT: v_mul_hi_u32 v10, v9, v6
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10
-; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6
+; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v11, v4, vcc
-; CHECK-NEXT: v_mul_lo_u32 v5, v12, v3
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc
+; CHECK-NEXT: v_mul_lo_u32 v5, v11, v3
; CHECK-NEXT: v_mul_lo_u32 v6, v7, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, v7, v3
-; CHECK-NEXT: v_mul_hi_u32 v3, v12, v3
-; CHECK-NEXT: v_mul_hi_u32 v10, v12, v4
+; CHECK-NEXT: v_mul_hi_u32 v9, v7, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v11, v3
+; CHECK-NEXT: v_mul_hi_u32 v10, v11, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v8, v12, v4
+; CHECK-NEXT: v_mul_lo_u32 v9, v11, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_mul_hi_u32 v6, v7, v4
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v3, v5
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v8, 0
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v3, v5
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v9, 0
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v5
-; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
+; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v5
+; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v8, v[4:5]
-; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v12, v4, vcc
-; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4
+; CHECK-NEXT: v_mov_b32_e32 v5, v4
+; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v10, v[5:6]
+; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v9, v[4:5]
+; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v11, v4, vcc
+; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v11, v4
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1
; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2
; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1
; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v10, s[4:5]
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, 1, v8
-; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v5, v6, v7, s[4:5]
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v9
+; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1
; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1
; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v7
-; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v10, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v6
+; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v7, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
+; CHECK-NEXT: v_xor_b32_e32 v3, v8, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3
; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
@@ -1856,182 +1914,188 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
; GISEL-NEXT: v_trunc_f32_e32 v13, v11
; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v13
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v7
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v13
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0
-; GISEL-NEXT: v_mov_b32_e32 v7, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[7:8]
-; GISEL-NEXT: v_mul_lo_u32 v7, v17, v11
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13]
-; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v14, v11
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v7, 0
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[13:14]
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v7, v[12:13]
+; GISEL-NEXT: v_mul_lo_u32 v13, v17, v11
+; GISEL-NEXT: v_mul_lo_u32 v14, v7, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_mul_hi_u32 v14, v7, v11
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v13, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v18, v7
-; GISEL-NEXT: v_mul_hi_u32 v18, v14, v12
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v14, v17, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
+; GISEL-NEXT: v_mul_hi_u32 v18, v7, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18
; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v7
-; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v11, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0
-; GISEL-NEXT: v_mov_b32_e32 v7, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[7:8]
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v7, v11
+; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v12, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v18, 0
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[13:14]
; GISEL-NEXT: v_xor_b32_e32 v15, v0, v7
; GISEL-NEXT: v_mul_lo_u32 v0, v17, v11
-; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12
-; GISEL-NEXT: v_xor_b32_e32 v16, v1, v7
-; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v18, v[12:13]
+; GISEL-NEXT: v_mul_hi_u32 v14, v18, v11
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v18, v12
; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11
+; GISEL-NEXT: v_xor_b32_e32 v1, v1, v7
+; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12
+; GISEL-NEXT: v_mul_lo_u32 v14, v17, v12
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0
-; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v13, v18, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v16, v0
-; GISEL-NEXT: v_mul_lo_u32 v12, v15, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v15, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0
-; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v13, v16, v1
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_mul_hi_u32 v12, v15, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v18, v0
+; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v17, v11, vcc
+; GISEL-NEXT: v_mul_lo_u32 v12, v1, v0
+; GISEL-NEXT: v_mul_lo_u32 v13, v15, v11
+; GISEL-NEXT: v_mul_hi_u32 v14, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v1, v0
+; GISEL-NEXT: v_mul_hi_u32 v16, v1, v11
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v12
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1
+; GISEL-NEXT: v_mul_lo_u32 v14, v1, v11
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT: v_mul_hi_u32 v13, v15, v11
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v8, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v1, v13
-; GISEL-NEXT: v_mov_b32_e32 v1, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v8, v14, v[1:2]
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v6
-; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v10, v6, vcc
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v13
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v6
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v8, v16, v[13:14]
; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v5, v0, v[12:13]
-; GISEL-NEXT: v_xor_b32_e32 v10, v1, v6
-; GISEL-NEXT: v_xor_b32_e32 v9, v9, v6
-; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v10
-; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v9
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v10, v6, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v9, v6
+; GISEL-NEXT: v_xor_b32_e32 v9, v13, v6
+; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v10
+; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v9
; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v15, v11
-; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v16, v12
-; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v13
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v16, v12, vcc
-; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v11, v5, vcc
-; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
-; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v1
-; GISEL-NEXT: v_trunc_f32_e32 v16, v11
-; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v16
-; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v1
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v1, v12, vcc
+; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13
+; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v12
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11
+; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v11
+; GISEL-NEXT: v_trunc_f32_e32 v13, v12
+; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v13
+; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v11
; GISEL-NEXT: v_sub_i32_e32 v19, vcc, 0, v10
; GISEL-NEXT: v_subb_u32_e32 v20, vcc, 0, v9, vcc
; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v19, v18, 0
-; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v16
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v8
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v8
-; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v13, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v8
-; GISEL-NEXT: v_mov_b32_e32 v1, v12
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v19, v16, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v15, v5
-; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v20, v18, v[12:13]
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v15, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v13
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v14, v8
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GISEL-NEXT: v_mul_lo_u32 v8, v15, v11
+; GISEL-NEXT: v_mov_b32_e32 v13, v12
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v19, v15, v[13:14]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v20, v18, v[12:13]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v17, v5
-; GISEL-NEXT: v_mul_lo_u32 v8, v18, v12
-; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v18, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
-; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11
-; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v15, v5
+; GISEL-NEXT: v_mul_lo_u32 v13, v18, v12
+; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v18, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT: v_mul_hi_u32 v11, v15, v11
+; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[8:9]
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v17, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v8, v13, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v15, v21, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9]
; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v8
-; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v15, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v17, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v15, v21, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT: v_mul_lo_u32 v15, v16, v12
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11
-; GISEL-NEXT: v_mul_hi_u32 v15, v18, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v21, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v13, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v8, v17, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v13, v21, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT: v_mul_hi_u32 v14, v18, v12
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_mul_hi_u32 v12, v16, v12
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15
+; GISEL-NEXT: v_mul_hi_u32 v12, v15, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v11
-; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v16, v12, vcc
+; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v15, v12, vcc
; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v19, v13, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v0, v5, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
; GISEL-NEXT: v_mov_b32_e32 v0, v12
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v15, v[0:1]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v14, v[0:1]
; GISEL-NEXT: v_ashrrev_i32_e32 v12, 31, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v14, v8, vcc
+; GISEL-NEXT: v_mul_hi_u32 v4, v14, v11
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v13, v[0:1]
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v12
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v12, vcc
-; GISEL-NEXT: v_xor_b32_e32 v14, v1, v12
-; GISEL-NEXT: v_mul_lo_u32 v1, v15, v11
+; GISEL-NEXT: v_xor_b32_e32 v15, v1, v12
+; GISEL-NEXT: v_mul_lo_u32 v1, v14, v11
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0
; GISEL-NEXT: v_xor_b32_e32 v16, v2, v12
; GISEL-NEXT: v_mul_hi_u32 v2, v13, v11
-; GISEL-NEXT: v_mul_hi_u32 v4, v15, v11
+; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v2, v15, v0
+; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1
; GISEL-NEXT: v_mul_hi_u32 v3, v13, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4
@@ -2039,25 +2103,25 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3
-; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v15, v0, vcc
+; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v14, v0, vcc
; GISEL-NEXT: v_mul_lo_u32 v2, v16, v1
-; GISEL-NEXT: v_mul_lo_u32 v3, v14, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v14, v1
+; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v4, v15, v1
; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1
-; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7
+; GISEL-NEXT: v_xor_b32_e32 v8, v8, v7
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v4, v16, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v14, v0
+; GISEL-NEXT: v_mul_hi_u32 v3, v15, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
@@ -2069,13 +2133,13 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v1
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: v_mov_b32_e32 v0, v3
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v13, v[0:1]
-; GISEL-NEXT: v_xor_b32_e32 v8, v8, v7
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v7
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v11, v[3:4]
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v14, v2
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v15, v2
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v16, v3, vcc
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v16, v3
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9
@@ -2135,126 +2199,132 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_xor_b32_e32 v1, v10, v0
; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4
; CGP-NEXT: v_cvt_f32_u32_e32 v11, v1
-; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v4
-; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v15, vcc, 0, v4
+; CGP-NEXT: v_subb_u32_e32 v16, vcc, 0, v1, vcc
; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11
; CGP-NEXT: v_rcp_iflag_f32_e32 v10, v10
; CGP-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10
; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10
; CGP-NEXT: v_trunc_f32_e32 v12, v11
; CGP-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12
-; CGP-NEXT: v_cvt_u32_f32_e32 v13, v10
-; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12
-; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12]
-; CGP-NEXT: v_mul_hi_u32 v17, v13, v10
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT: v_mul_lo_u32 v12, v16, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT: v_mul_lo_u32 v18, v13, v11
-; CGP-NEXT: v_mul_lo_u32 v19, v16, v11
+; CGP-NEXT: v_cvt_u32_f32_e32 v14, v10
+; CGP-NEXT: v_cvt_u32_f32_e32 v17, v12
+; CGP-NEXT: ; implicit-def: $vgpr12_vgpr13
+; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0
+; CGP-NEXT: v_mov_b32_e32 v12, v11
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[12:13]
+; CGP-NEXT: v_mul_hi_u32 v13, v14, v10
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12]
+; CGP-NEXT: v_mul_lo_u32 v12, v17, v10
+; CGP-NEXT: v_mul_hi_u32 v10, v17, v10
+; CGP-NEXT: v_mul_lo_u32 v18, v14, v11
+; CGP-NEXT: v_mul_lo_u32 v19, v17, v11
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
-; CGP-NEXT: v_mul_hi_u32 v17, v13, v11
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT: v_mul_hi_u32 v13, v14, v11
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v12, vcc, v18, v12
; CGP-NEXT: v_add_i32_e32 v10, vcc, v19, v10
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v17
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17
-; CGP-NEXT: v_mul_hi_u32 v11, v16, v11
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13
+; CGP-NEXT: v_mul_hi_u32 v11, v17, v11
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v10
-; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc
-; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12]
-; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc
-; CGP-NEXT: v_xor_b32_e32 v12, v8, v14
-; CGP-NEXT: v_mul_lo_u32 v8, v16, v10
-; CGP-NEXT: v_mul_lo_u32 v15, v13, v11
-; CGP-NEXT: v_xor_b32_e32 v17, v9, v14
-; CGP-NEXT: v_mul_hi_u32 v9, v13, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v16, v10
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v10
+; CGP-NEXT: v_addc_u32_e32 v17, vcc, v17, v11, vcc
+; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0
+; CGP-NEXT: ; implicit-def: $vgpr12_vgpr13
+; CGP-NEXT: v_mov_b32_e32 v12, v11
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[12:13]
+; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12]
+; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
+; CGP-NEXT: v_xor_b32_e32 v12, v8, v13
+; CGP-NEXT: v_mul_lo_u32 v8, v17, v10
+; CGP-NEXT: v_mul_lo_u32 v15, v14, v11
+; CGP-NEXT: v_xor_b32_e32 v16, v9, v13
+; CGP-NEXT: v_mul_hi_u32 v9, v14, v10
+; CGP-NEXT: v_mul_hi_u32 v10, v17, v10
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v15
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v16, v11
+; CGP-NEXT: v_mul_lo_u32 v9, v17, v11
; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8
-; CGP-NEXT: v_mul_hi_u32 v15, v13, v11
+; CGP-NEXT: v_mul_hi_u32 v15, v14, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15
-; CGP-NEXT: v_mul_hi_u32 v11, v16, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v17, v11
; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, v16, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v10, v17, v8
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; CGP-NEXT: v_addc_u32_e32 v9, vcc, v17, v9, vcc
+; CGP-NEXT: v_mul_lo_u32 v10, v16, v8
; CGP-NEXT: v_mul_lo_u32 v11, v12, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v12, v8
-; CGP-NEXT: v_mul_hi_u32 v8, v17, v8
-; CGP-NEXT: v_mul_hi_u32 v15, v17, v9
+; CGP-NEXT: v_mul_hi_u32 v14, v12, v8
+; CGP-NEXT: v_mul_hi_u32 v8, v16, v8
+; CGP-NEXT: v_mul_hi_u32 v15, v16, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v13, v17, v9
+; CGP-NEXT: v_mul_lo_u32 v14, v16, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; CGP-NEXT: v_mul_hi_u32 v11, v12, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v8, v10
-; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v13, 0
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v8, v10
+; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v14, 0
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v10
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[9:10]
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v10
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
; CGP-NEXT: v_sub_i32_e32 v8, vcc, v12, v8
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v13, v[9:10]
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v17, v9, vcc
-; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v17, v9
+; CGP-NEXT: v_mov_b32_e32 v10, v9
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v15, v[10:11]
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v14, v[9:10]
+; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v16, v9, vcc
+; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v16, v9
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1
; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4
; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v1
; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v10, v12, v15, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v13
-; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v14
+; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v15, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v1
; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v4, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v12
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v11
+; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v12, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v4, v15, v8, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v8, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
-; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
-; CGP-NEXT: v_xor_b32_e32 v8, v14, v0
-; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc
+; CGP-NEXT: v_xor_b32_e32 v8, v13, v0
+; CGP-NEXT: v_cndmask_b32_e32 v4, v15, v4, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v1, v8
; CGP-NEXT: v_xor_b32_e32 v1, v4, v8
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
@@ -2318,72 +2388,74 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
; CGP-NEXT: v_trunc_f32_e32 v10, v8
; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10
-; CGP-NEXT: v_cvt_u32_f32_e32 v11, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v14, v10
-; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT: v_mov_b32_e32 v6, v9
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7]
-; CGP-NEXT: v_mul_lo_u32 v6, v14, v8
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT: v_mul_hi_u32 v10, v11, v8
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v6, 0
+; CGP-NEXT: v_mov_b32_e32 v10, v9
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[10:11]
+; CGP-NEXT: v_mul_hi_u32 v11, v6, v8
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v6, v[9:10]
+; CGP-NEXT: v_mul_lo_u32 v10, v14, v8
; CGP-NEXT: v_mul_hi_u32 v8, v14, v8
-; CGP-NEXT: v_mul_lo_u32 v15, v11, v9
+; CGP-NEXT: v_mul_lo_u32 v15, v6, v9
; CGP-NEXT: v_mul_lo_u32 v16, v14, v9
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v15
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v11, v9
-; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v6, v9
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10
; CGP-NEXT: v_add_i32_e32 v8, vcc, v16, v8
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT: v_mul_hi_u32 v9, v14, v9
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10
-; CGP-NEXT: v_mul_hi_u32 v9, v14, v9
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v6
-; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v8, vcc
-; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT: v_mov_b32_e32 v6, v9
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7]
-; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v7, v12, vcc
-; CGP-NEXT: v_xor_b32_e32 v10, v5, v12
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v9, vcc
+; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v6, 0
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_mov_b32_e32 v10, v9
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[10:11]
+; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v7
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v6, v[9:10]
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CGP-NEXT: v_xor_b32_e32 v10, v5, v11
; CGP-NEXT: v_mul_lo_u32 v5, v14, v8
-; CGP-NEXT: v_mul_lo_u32 v7, v11, v9
-; CGP-NEXT: v_xor_b32_e32 v13, v6, v12
-; CGP-NEXT: v_mul_hi_u32 v6, v11, v8
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_xor_b32_e32 v13, v7, v11
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v8
; CGP-NEXT: v_mul_hi_u32 v8, v14, v8
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v6, v14, v9
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v7, v11, v9
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_mul_lo_u32 v7, v14, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT: v_mul_hi_u32 v9, v14, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT: v_mul_hi_u32 v8, v14, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v14, v6, vcc
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v14, v7, vcc
; CGP-NEXT: v_mul_lo_u32 v7, v13, v5
; CGP-NEXT: v_mul_lo_u32 v8, v10, v6
; CGP-NEXT: v_mul_hi_u32 v9, v10, v5
; CGP-NEXT: v_mul_hi_u32 v5, v13, v5
-; CGP-NEXT: v_mul_hi_u32 v11, v13, v6
+; CGP-NEXT: v_mul_hi_u32 v12, v13, v6
; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
@@ -2400,38 +2472,40 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, 0
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v7
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v8, v[6:7]
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v7
+; CGP-NEXT: ; implicit-def: $vgpr7_vgpr8
; CGP-NEXT: v_sub_i32_e32 v5, vcc, v10, v5
+; CGP-NEXT: v_mov_b32_e32 v7, v6
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v12, v[7:8]
; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[6:7]
; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v13, v6, vcc
; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3
; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4
; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3
; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v9
-; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v7, v8, v10, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v9
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3
; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v10
-; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v11, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v8
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v5, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v5, v12, v2
-; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT: v_xor_b32_e32 v5, v11, v2
+; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
; CGP-NEXT: v_xor_b32_e32 v2, v3, v5
; CGP-NEXT: v_xor_b32_e32 v3, v4, v5
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
@@ -2545,8 +2619,10 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_mac_f32_e32 v3, 0xcf800000, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v3
; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v7
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[4:5]
+; GISEL-NEXT: v_mov_b32_e32 v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[7:8]
; GISEL-NEXT: v_mul_lo_u32 v4, v12, v3
; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
; GISEL-NEXT: v_mul_hi_u32 v8, v9, v3
@@ -2572,7 +2648,9 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v3
; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[4:5]
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GISEL-NEXT: v_mov_b32_e32 v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[7:8]
; GISEL-NEXT: v_mul_lo_u32 v4, v12, v3
; GISEL-NEXT: v_and_b32_e32 v10, 0xffffff, v0
; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
@@ -2614,122 +2692,127 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v0, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v3
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v4, v[7:8]
+; GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GISEL-NEXT: v_mov_b32_e32 v8, v7
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v4, v[8:9]
+; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v3
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v0, v[7:8]
; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v9
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v0, v[7:8]
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v10, v6
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v10, v6
+; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, v7, vcc
; GISEL-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2
-; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v7, vcc
-; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 0, v7
+; GISEL-NEXT: v_sub_i32_e64 v12, s[4:5], 0, v7
; GISEL-NEXT: v_trunc_f32_e32 v7, v5
; GISEL-NEXT: v_mac_f32_e32 v2, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v2
+; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], 0, v3
; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v2, 0
; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v7
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1
-; GISEL-NEXT: v_mov_b32_e32 v2, v6
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v1
+; GISEL-NEXT: v_mov_b32_e32 v7, v6
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5]
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[2:3]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v16, s[4:5]
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v10, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v15, v5
-; GISEL-NEXT: v_mul_lo_u32 v10, v12, v6
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v8, v1
-; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v2, vcc
-; GISEL-NEXT: v_mul_hi_u32 v2, v12, v5
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[7:8]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v8, -1, v16, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v2, v[6:7]
+; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v12, vcc
+; GISEL-NEXT: v_mul_lo_u32 v10, v15, v5
+; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT: v_mul_hi_u32 v16, v2, v5
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v9, v1
+; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v15, v6
+; GISEL-NEXT: v_mul_lo_u32 v16, v15, v6
; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2
-; GISEL-NEXT: v_mul_hi_u32 v10, v12, v6
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT: v_mul_hi_u32 v12, v2, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
+; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v2
-; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v15, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v0
-; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v4, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v2, v5
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v15, v6, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v10, 0
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v0
+; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1
+; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; GISEL-NEXT: v_mov_b32_e32 v1, v6
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v10, v[1:2]
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v14, v7, v[1:2]
-; GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v8, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v12
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc
-; GISEL-NEXT: v_mul_lo_u32 v13, v10, v5
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v1
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v12, v[1:2]
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v9, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v14, v10, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v15
+; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v16, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, v12, v5
+; GISEL-NEXT: v_mul_lo_u32 v13, v10, v1
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc
-; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v12, v10, v1
-; GISEL-NEXT: v_mul_hi_u32 v5, v10, v5
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6
-; GISEL-NEXT: v_mul_hi_u32 v13, v7, v1
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13
-; GISEL-NEXT: v_mul_hi_u32 v1, v10, v1
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6
+; GISEL-NEXT: v_cndmask_b32_e32 v14, v15, v2, vcc
+; GISEL-NEXT: v_mul_hi_u32 v2, v10, v5
+; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v9, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v6, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v1
+; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5
+; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v9, v2
+; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1
+; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v6, v5
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6
-; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v6
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5
-; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v10, v1, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v6, 0, v5
-; GISEL-NEXT: v_mul_lo_u32 v7, v11, v1
-; GISEL-NEXT: v_mul_hi_u32 v10, v11, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v15, v8, vcc
-; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT: v_mul_lo_u32 v7, 0, v1
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, v11, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v9
+; GISEL-NEXT: v_mul_hi_u32 v1, v12, v1
+; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v5, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v6, v5
+; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v5
+; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], v10, v2
+; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v12, v1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v5, 0, v2
+; GISEL-NEXT: v_mul_lo_u32 v6, v11, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v16, v7, vcc
+; GISEL-NEXT: v_mul_hi_u32 v7, v11, v2
+; GISEL-NEXT: v_mul_hi_u32 v2, 0, v2
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_mul_lo_u32 v6, 0, v1
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v11, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v2, v5
; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1
; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v1, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v1, v2
+; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; GISEL-NEXT: v_mov_b32_e32 v1, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[1:2]
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v9, vcc
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v5
; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], 0, v6
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], 0, v6, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
index 1441591a5fcce..0fba42ac9575b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
@@ -175,60 +175,64 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_trunc_f32_e32 v2, v1
; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX8-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v4, 0
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX8-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX8-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v7, v3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v4, 0
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v3, v4, v1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1
+; GFX8-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v6, v3
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0
; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1
; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0
@@ -250,12 +254,14 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX8-NEXT: v_mov_b32_e32 v6, s11
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v5, v[2:3]
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s10, v0
; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
-; GFX8-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NEXT: v_mov_b32_e32 v3, s9
; GFX8-NEXT: v_subb_u32_e64 v2, s[0:1], v6, v1, vcc
; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s11, v1
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2
@@ -263,19 +269,19 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2
-; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1]
; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v0
; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc
; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4
-; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
+; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v5, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8
; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7
-; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8
-; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s8, v7
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v7
; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1]
; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9
; GFX8-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
@@ -284,19 +290,19 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc
; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v0, v3, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1]
; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13]
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v4
-; GFX8-NEXT: v_xor_b32_e32 v1, s1, v3
-; GFX8-NEXT: v_mov_b32_e32 v3, s1
+; GFX8-NEXT: v_xor_b32_e32 v1, s1, v5
+; GFX8-NEXT: v_mov_b32_e32 v4, s1
; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
-; GFX8-NEXT: v_xor_b32_e32 v3, s2, v5
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3
; GFX8-NEXT: v_xor_b32_e32 v4, s2, v2
; GFX8-NEXT: v_mov_b32_e32 v5, s2
; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s2, v3
@@ -335,59 +341,63 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX9-NEXT: v_trunc_f32_e32 v2, v1
; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX9-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_u32_e32 v2, v6, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v7, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v1, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: v_mov_b32_e32 v7, s7
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v3, v4, v1
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT: v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v6, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc
; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0
; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1
; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0
@@ -408,25 +418,27 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v5, 0
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s6, v3, v[1:2]
-; GFX9-NEXT: v_mov_b32_e32 v6, s9
+; GFX9-NEXT: v_add3_u32 v4, v3, v2, v6
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s8, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s6, v4, v[2:3]
+; GFX9-NEXT: v_mov_b32_e32 v3, s9
+; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s7, v5, v[1:2]
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc
+; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v3, v1, vcc
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v2
; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v0
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[0:1]
; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s6, v0
; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc
; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5
-; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
+; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v9
; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v8
@@ -440,32 +452,33 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1]
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc
+; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3
; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc
-; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v10, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v7, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1]
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1]
; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]
-; GFX9-NEXT: v_xor_b32_e32 v0, s0, v5
-; GFX9-NEXT: v_xor_b32_e32 v1, s1, v3
+; GFX9-NEXT: v_xor_b32_e32 v0, s0, v3
+; GFX9-NEXT: v_xor_b32_e32 v1, s1, v4
; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: v_xor_b32_e32 v3, s2, v6
-; GFX9-NEXT: v_xor_b32_e32 v5, s2, v2
-; GFX9-NEXT: v_mov_b32_e32 v6, s2
+; GFX9-NEXT: v_xor_b32_e32 v3, s2, v5
+; GFX9-NEXT: v_xor_b32_e32 v4, s2, v2
+; GFX9-NEXT: v_mov_b32_e32 v5, s2
; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v6, vcc
-; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[12:13]
-; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[14:15]
+; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v5, vcc
+; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[12:13]
+; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[14:15]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: sdivrem_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_ashr_i32 s2, s17, 31
; GFX10-NEXT: s_ashr_i32 s4, s19, 31
@@ -485,62 +498,65 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX10-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX10-NEXT: v_trunc_f32_e32 v2, v1
-; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
-; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2
+; GFX10-NEXT: v_trunc_f32_e32 v4, v1
+; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v4
+; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v4
; GFX10-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s9, s8, v3, 0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s9, s8, v4, v[1:2]
-; GFX10-NEXT: s_subb_u32 s9, 0, s7
+; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s9, s8, v5, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s9, s8, v4, v[2:3]
+; GFX10-NEXT: s_subb_u32 s9, 0, s7
; GFX10-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s9, v3, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s9, v5, v[1:2]
; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0
-; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX10-NEXT: v_mul_lo_u32 v3, v5, v1
; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v8, v5, v1
; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s10
+; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s10
; GFX10-NEXT: v_add_co_u32 v6, s10, v7, v6
; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s10
; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s10
; GFX10-NEXT: v_add_co_u32 v2, s10, v6, v8
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10
-; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v7, v6
; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10
-; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0
-; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v5, v0
+; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v3, 0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s8, v4, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v5, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s9, v3, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s8, v4, v[2:3]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s9, v5, v[1:2]
; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0
-; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX10-NEXT: v_mul_lo_u32 v3, v5, v1
; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v8, v5, v1
; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT: v_add_co_u32 v2, s8, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s8
+; GFX10-NEXT: v_add_co_u32 v2, s8, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s8
; GFX10-NEXT: v_add_co_u32 v6, s8, v7, v6
; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s8
; GFX10-NEXT: v_add_co_u32 v0, s8, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s8
; GFX10-NEXT: v_add_co_u32 v2, s8, v6, v8
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8
-; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v7, v6
; GFX10-NEXT: v_add_co_u32 v0, s8, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8
-; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0
-; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v5, v0
+; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1
; GFX10-NEXT: v_mul_lo_u32 v2, s1, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo
; GFX10-NEXT: v_mul_hi_u32 v4, s0, v0
@@ -557,16 +573,18 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2
; GFX10-NEXT: v_add_co_u32 v0, s8, v0, v3
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s8
+; GFX10-NEXT: v_mul_hi_u32 v6, s1, v1
; GFX10-NEXT: v_add_co_u32 v5, s8, v0, v2
-; GFX10-NEXT: v_mul_hi_u32 v2, s1, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8
-; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s8
+; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v3
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s8, s6, v5, 0
-; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s6, v3, v[1:2]
+; GFX10-NEXT: v_add3_u32 v4, v4, v7, v6
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s6, v4, v[2:3]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s7, v5, v[1:2]
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1
-; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo
; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_sub_nc_u32_e32 v6, s1, v1
; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo
@@ -583,7 +601,7 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v9
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0
; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1
-; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v3, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v9
; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v1
@@ -593,11 +611,11 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v6, s0, 0, v6, s0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo
; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v7
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v7, v8, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v2, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v4, v3, s0
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s0
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0
; GFX10-NEXT: v_mov_b32_e32 v4, 0
@@ -1311,63 +1329,67 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_trunc_f32_e32 v2, v1
; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX8-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v4, 0
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX8-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX8-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v7, v3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v4, 0
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v3, v4, v1
; GFX8-NEXT: s_xor_b64 s[16:17], s[4:5], s[6:7]
; GFX8-NEXT: s_ashr_i32 s6, s19, 31
; GFX8-NEXT: s_mov_b32 s7, s6
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1
+; GFX8-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v6, v3
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0
; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1
; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0
@@ -1389,26 +1411,28 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX8-NEXT: v_mov_b32_e32 v6, s11
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v5, v[2:3]
; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s10, v0
; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
-; GFX8-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NEXT: v_mov_b32_e32 v3, s9
; GFX8-NEXT: s_ashr_i32 s10, s3, 31
; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc
; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s11, v1
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7
-; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6
; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s8, v7
; GFX8-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[0:1]
; GFX8-NEXT: v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc
; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], 1, v4
-; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
+; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v5, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9
; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8
@@ -1424,15 +1448,15 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: s_addc_u32 s3, s3, s10
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11]
; GFX8-NEXT: v_cvt_f32_u32_e32 v14, s3
-; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
-; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s2
+; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
+; GFX8-NEXT: v_cvt_f32_u32_e32 v3, s2
; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, s8, v8
; GFX8-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v0, vcc
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v14
-; GFX8-NEXT: v_add_f32_e32 v0, v0, v5
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v3
; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v1, v12, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v12, vcc
; GFX8-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7]
; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
@@ -1441,29 +1465,29 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_add_f32_e32 v0, v1, v0
; GFX8-NEXT: v_cvt_u32_f32_e32 v12, v0
; GFX8-NEXT: s_sub_u32 s5, 0, s2
+; GFX8-NEXT: v_cvt_u32_f32_e32 v11, v11
; GFX8-NEXT: s_subb_u32 s20, 0, s3
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v12, 0
; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1]
-; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v11
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v3, v10, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v3, s[0:1]
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s5, v11, v[2:3]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v15, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1]
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s5, v5, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v3, v5, v0
; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s20, v12, v[1:2]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v16, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1]
+; GFX8-NEXT: v_mul_lo_u32 v3, v11, v0
; GFX8-NEXT: v_mul_lo_u32 v8, v12, v1
+; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1]
; GFX8-NEXT: v_mul_hi_u32 v2, v12, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
-; GFX8-NEXT: v_xor_b32_e32 v9, s17, v10
+; GFX8-NEXT: v_mul_hi_u32 v0, v11, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v3, v5, v1
+; GFX8-NEXT: v_mul_lo_u32 v3, v11, v1
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2
; GFX8-NEXT: v_mul_hi_u32 v8, v12, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
@@ -1471,73 +1495,78 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8
-; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v11, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
; GFX8-NEXT: v_add_u32_e32 v8, vcc, v12, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1]
; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v8, 0
-; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
-; GFX8-NEXT: v_xor_b32_e32 v1, s16, v4
+; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v11, v1, vcc
+; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX8-NEXT: v_xor_b32_e32 v10, s16, v4
; GFX8-NEXT: v_mov_b32_e32 v0, v3
-; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v5, v[0:1]
-; GFX8-NEXT: v_mov_b32_e32 v10, s17
-; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s16, v1
+; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v9, v[0:1]
+; GFX8-NEXT: v_xor_b32_e32 v5, s17, v5
+; GFX8-NEXT: v_mov_b32_e32 v11, s17
; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v8, v[3:4]
-; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v9, v10, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s16, v10
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v5, v11, vcc
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v7
-; GFX8-NEXT: v_mul_lo_u32 v7, v5, v2
-; GFX8-NEXT: v_mul_lo_u32 v9, v8, v3
+; GFX8-NEXT: v_xor_b32_e32 v5, s4, v6
+; GFX8-NEXT: v_mul_lo_u32 v6, v9, v2
+; GFX8-NEXT: v_mul_lo_u32 v7, v8, v3
; GFX8-NEXT: v_mul_hi_u32 v11, v8, v2
-; GFX8-NEXT: v_mul_hi_u32 v2, v5, v2
-; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9
-; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11
+; GFX8-NEXT: v_mul_hi_u32 v2, v9, v2
+; GFX8-NEXT: v_mov_b32_e32 v10, s4
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v11, v5, v3
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT: v_mul_hi_u32 v9, v8, v3
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11
+; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GFX8-NEXT: v_mul_lo_u32 v11, v9, v3
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6
+; GFX8-NEXT: v_mul_hi_u32 v7, v8, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v11, v2
; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v9
-; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9
-; GFX8-NEXT: v_mul_hi_u32 v3, v5, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7
+; GFX8-NEXT: v_add_u32_e32 v7, vcc, v11, v7
+; GFX8-NEXT: v_mul_hi_u32 v3, v9, v3
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
+; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2
-; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GFX8-NEXT: v_mov_b32_e32 v10, s4
-; GFX8-NEXT: v_mul_lo_u32 v7, s9, v2
-; GFX8-NEXT: v_mul_lo_u32 v8, s8, v3
+; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v9, v3, vcc
+; GFX8-NEXT: v_mul_lo_u32 v6, s9, v2
+; GFX8-NEXT: v_mul_lo_u32 v7, s8, v3
+; GFX8-NEXT: v_mul_hi_u32 v8, s8, v2
; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4
-; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v10, vcc
-; GFX8-NEXT: v_mul_hi_u32 v6, s8, v2
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6
+; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v5, v10, vcc
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v7, s9, v3
+; GFX8-NEXT: v_mul_lo_u32 v8, s9, v3
; GFX8-NEXT: v_mul_hi_u32 v2, s9, v2
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6
-; GFX8-NEXT: v_mul_hi_u32 v8, s8, v3
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v8
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6
+; GFX8-NEXT: v_mul_hi_u32 v7, s8, v3
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7
; GFX8-NEXT: v_add_u32_e32 v8, vcc, v2, v6
; GFX8-NEXT: v_mul_hi_u32 v9, s9, v3
; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v8, 0
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6
; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v6
-; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v9, v[3:4]
+; GFX8-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX8-NEXT: v_mov_b32_e32 v10, s9
+; GFX8-NEXT: v_mov_b32_e32 v6, v3
+; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v9, v[6:7]
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s8, v2
; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v8, v[6:7]
; GFX8-NEXT: v_mov_b32_e32 v3, s3
@@ -1622,66 +1651,70 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_trunc_f32_e32 v2, v1
; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v4, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX9-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_u32_e32 v2, v6, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v7, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v1, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v4, 0
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v3, v4, v1
; GFX9-NEXT: s_xor_b64 s[16:17], s[4:5], s[6:7]
; GFX9-NEXT: s_ashr_i32 s6, s19, 31
; GFX9-NEXT: s_mov_b32 s7, s6
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT: v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v6, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc
; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0
; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1
; GFX9-NEXT: v_mul_hi_u32 v4, s10, v0
; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0
-; GFX9-NEXT: v_mul_hi_u32 v6, s11, v1
+; GFX9-NEXT: v_mul_hi_u32 v5, s11, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
@@ -1693,37 +1726,39 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
+; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v0, v2
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v6, 0
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
-; GFX9-NEXT: v_mov_b32_e32 v6, s11
-; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s10, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
+; GFX9-NEXT: v_add3_u32 v7, v3, v2, v5
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mov_b32_e32 v5, s11
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v7, v[2:3]
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s10, v0
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s9, v6, v[1:2]
; GFX9-NEXT: v_mov_b32_e32 v4, s9
; GFX9-NEXT: s_ashr_i32 s10, s3, 31
-; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
-; GFX9-NEXT: v_sub_u32_e32 v0, s11, v1
-; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7
-; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1]
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6
-; GFX9-NEXT: v_subrev_co_u32_e32 v9, vcc, s8, v7
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v1, v2, s[0:1]
-; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[0:1], 0, v0, vcc
-; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], 1, v5
-; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
+; GFX9-NEXT: v_subb_co_u32_e64 v1, s[0:1], v5, v2, vcc
+; GFX9-NEXT: v_sub_u32_e32 v2, s11, v2
+; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
+; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
+; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1]
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v1
+; GFX9-NEXT: v_subrev_co_u32_e32 v9, vcc, s8, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v8, v3, v5, s[0:1]
+; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[0:1], 0, v2, vcc
+; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], 1, v6
+; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v7, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v10
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v9
; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v10
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v1, v12, s[0:1]
-; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v12, v3, v12, s[0:1]
+; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v5
; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1]
; GFX9-NEXT: s_add_u32 s0, s18, s6
; GFX9-NEXT: s_addc_u32 s1, s19, s6
@@ -1731,118 +1766,123 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: s_mov_b32 s11, s10
; GFX9-NEXT: s_addc_u32 s3, s3, s10
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11]
-; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3
+; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s3
; GFX9-NEXT: v_cvt_f32_u32_e32 v15, s2
-; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc
-; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
-; GFX9-NEXT: v_add_f32_e32 v1, v1, v15
-; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s8, v9
-; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
-; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX9-NEXT: v_trunc_f32_e32 v16, v1
-; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v16
-; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v0
+; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc
+; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
+; GFX9-NEXT: v_add_f32_e32 v3, v3, v15
+; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; GFX9-NEXT: v_subrev_co_u32_e32 v15, vcc, s8, v9
+; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v2, vcc
+; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v3
+; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
+; GFX9-NEXT: v_trunc_f32_e32 v4, v3
+; GFX9-NEXT: v_mul_f32_e32 v3, 0xcf800000, v4
+; GFX9-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v2
; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7]
; GFX9-NEXT: s_sub_u32 s5, 0, s2
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v17, 0
-; GFX9-NEXT: v_cndmask_b32_e32 v12, v2, v13, vcc
-; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v16
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v17, 0
+; GFX9-NEXT: v_cndmask_b32_e32 v12, v5, v13, vcc
+; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v4
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_subb_u32 s20, 0, s3
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v13, v[1:2]
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v13, v[4:5]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8
-; GFX9-NEXT: v_cndmask_b32_e64 v8, v3, v11, s[0:1]
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s20, v17, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v13, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v15, vcc
-; GFX9-NEXT: v_mul_lo_u32 v3, v17, v1
-; GFX9-NEXT: v_mul_hi_u32 v10, v17, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v13, v0
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1]
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v10
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v10, v13, v1
-; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT: v_mul_hi_u32 v3, v17, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v13, v1
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v10, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v6, v12, s[0:1]
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[18:19], s20, v17, v[3:4]
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v11, s[0:1]
+; GFX9-NEXT: v_mul_lo_u32 v7, v13, v2
+; GFX9-NEXT: v_mul_lo_u32 v8, v17, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v9, v15, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v16, vcc
+; GFX9-NEXT: v_mul_hi_u32 v10, v17, v2
+; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8
+; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10
+; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX9-NEXT: v_mul_lo_u32 v10, v13, v3
+; GFX9-NEXT: v_mul_hi_u32 v2, v13, v2
+; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
+; GFX9-NEXT: v_mul_hi_u32 v8, v17, v3
+; GFX9-NEXT: v_mul_hi_u32 v3, v13, v3
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v3, v10, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v17, v0
-; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v10, 0
-; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v13, v1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v8
+; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
+; GFX9-NEXT: v_add_u32_e32 v8, v10, v8
+; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX9-NEXT: v_add3_u32 v3, v8, v7, v3
+; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v17, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v13, v3, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v7, 0
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v1, v9, s[0:1]
+; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX9-NEXT: v_xor_b32_e32 v10, s17, v4
; GFX9-NEXT: v_mov_b32_e32 v0, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v4, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1]
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v11, v[0:1]
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v8, v[0:1]
; GFX9-NEXT: v_xor_b32_e32 v5, s16, v5
-; GFX9-NEXT: v_xor_b32_e32 v8, s17, v8
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v10, v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v9, s17
+; GFX9-NEXT: v_mov_b32_e32 v11, s17
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v7, v[0:1]
; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s16, v5
-; GFX9-NEXT: v_xor_b32_e32 v4, s4, v7
-; GFX9-NEXT: v_mul_lo_u32 v5, v11, v2
-; GFX9-NEXT: v_mul_lo_u32 v7, v10, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v9, vcc
-; GFX9-NEXT: v_mul_hi_u32 v8, v10, v2
-; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7
-; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8
+; GFX9-NEXT: v_xor_b32_e32 v4, s4, v6
+; GFX9-NEXT: v_mul_lo_u32 v5, v8, v2
+; GFX9-NEXT: v_mul_lo_u32 v6, v7, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v10, v11, vcc
+; GFX9-NEXT: v_mul_hi_u32 v10, v7, v2
+; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v10
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3
-; GFX9-NEXT: v_mul_hi_u32 v2, v11, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT: v_mul_hi_u32 v7, v10, v3
-; GFX9-NEXT: v_mul_hi_u32 v3, v11, v3
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
-; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX9-NEXT: v_mul_lo_u32 v10, v8, v3
+; GFX9-NEXT: v_mul_hi_u32 v2, v8, v2
+; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
+; GFX9-NEXT: v_mul_hi_u32 v6, v7, v3
+; GFX9-NEXT: v_mul_hi_u32 v3, v8, v3
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
+; GFX9-NEXT: v_add_u32_e32 v6, v10, v6
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v3, v7, v5, v3
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc
+; GFX9-NEXT: v_add3_u32 v3, v6, v5, v3
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v3, vcc
; GFX9-NEXT: v_mul_lo_u32 v5, s9, v2
-; GFX9-NEXT: v_mul_lo_u32 v7, s8, v3
-; GFX9-NEXT: v_mul_hi_u32 v9, s8, v2
+; GFX9-NEXT: v_mul_lo_u32 v6, s8, v3
+; GFX9-NEXT: v_mul_hi_u32 v8, s8, v2
; GFX9-NEXT: v_mul_hi_u32 v2, s9, v2
; GFX9-NEXT: v_mul_hi_u32 v12, s9, v3
-; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7
-; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9
+; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v9, s9, v3
-; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT: v_mul_hi_u32 v7, s8, v3
-; GFX9-NEXT: v_xor_b32_e32 v6, s4, v6
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
-; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX9-NEXT: v_mul_lo_u32 v8, s9, v3
+; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
+; GFX9-NEXT: v_mul_hi_u32 v6, s8, v3
+; GFX9-NEXT: v_xor_b32_e32 v9, s4, v9
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v2, v5
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v10, 0
-; GFX9-NEXT: v_mov_b32_e32 v8, s4
+; GFX9-NEXT: v_mov_b32_e32 v7, s4
; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s4, v4
-; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v8, vcc
-; GFX9-NEXT: v_add_u32_e32 v6, v9, v7
+; GFX9-NEXT: v_add_u32_e32 v6, v8, v6
+; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v9, v7, vcc
; GFX9-NEXT: v_add3_u32 v8, v6, v11, v12
-; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v8, v[3:4]
+; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX9-NEXT: v_mov_b32_e32 v9, s9
+; GFX9-NEXT: v_mov_b32_e32 v6, v3
+; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v8, v[6:7]
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v2
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v10, v[6:7]
; GFX9-NEXT: v_mov_b32_e32 v3, s3
@@ -1937,259 +1977,268 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
; GFX10-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
-; GFX10-NEXT: v_trunc_f32_e32 v2, v2
-; GFX10-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v1
-; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
-; GFX10-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
-; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v2
+; GFX10-NEXT: v_trunc_f32_e32 v4, v2
+; GFX10-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v1
+; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v4
+; GFX10-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
+; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX10-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX10-NEXT: v_trunc_f32_e32 v6, v4
+; GFX10-NEXT: v_trunc_f32_e32 v6, v3
; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0
-; GFX10-NEXT: v_mul_f32_e32 v4, 0xcf800000, v6
+; GFX10-NEXT: v_mul_f32_e32 v3, 0xcf800000, v6
+; GFX10-NEXT: v_cvt_u32_f32_e32 v10, v6
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s21, v7, 0
-; GFX10-NEXT: v_add_f32_e32 v3, v4, v3
+; GFX10-NEXT: v_add_f32_e32 v5, v3, v2
; GFX10-NEXT: s_sub_u32 s5, 0, s2
-; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v3
-; GFX10-NEXT: v_mul_hi_u32 v10, v9, v0
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s22, s5, v8, 0
-; GFX10-NEXT: v_mad_u64_u32 v[4:5], s22, s21, v9, v[1:2]
-; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v6
-; GFX10-NEXT: v_mov_b32_e32 v1, v3
-; GFX10-NEXT: v_mul_hi_u32 v6, v7, v0
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v5
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
+; GFX10-NEXT: v_mul_hi_u32 v11, v7, v0
+; GFX10-NEXT: v_mul_hi_u32 v12, v9, v0
+; GFX10-NEXT: v_mad_u64_u32 v[4:5], s22, s5, v8, 0
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s22, s21, v9, v[2:3]
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: s_subb_u32 s22, 0, s3
-; GFX10-NEXT: v_mul_hi_u32 v12, v8, v2
-; GFX10-NEXT: v_mul_lo_u32 v11, v5, v2
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s23, s20, v7, v[4:5]
-; GFX10-NEXT: v_mul_lo_u32 v4, v9, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s5, v5, v[1:2]
-; GFX10-NEXT: v_mul_hi_u32 v2, v5, v2
-; GFX10-NEXT: v_mul_lo_u32 v13, v7, v3
-; GFX10-NEXT: v_mul_lo_u32 v14, v9, v3
-; GFX10-NEXT: v_mul_hi_u32 v15, v7, v3
+; GFX10-NEXT: v_mov_b32_e32 v2, v5
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s23, s20, v7, v[1:2]
+; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s5, v10, v[2:3]
+; GFX10-NEXT: v_mul_lo_u32 v2, v10, v4
+; GFX10-NEXT: v_mul_hi_u32 v3, v8, v4
+; GFX10-NEXT: v_mul_hi_u32 v4, v10, v4
+; GFX10-NEXT: v_mul_lo_u32 v13, v7, v5
+; GFX10-NEXT: v_mul_lo_u32 v14, v9, v5
+; GFX10-NEXT: v_mul_hi_u32 v15, v7, v5
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s22, v8, v[0:1]
-; GFX10-NEXT: v_mul_hi_u32 v1, v9, v3
-; GFX10-NEXT: v_add_co_u32 v3, s23, v4, v13
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23
-; GFX10-NEXT: v_add_co_u32 v10, s23, v14, v10
+; GFX10-NEXT: v_mul_hi_u32 v1, v9, v5
+; GFX10-NEXT: v_add_co_u32 v5, s23, v6, v13
+; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s23
+; GFX10-NEXT: v_add_co_u32 v12, s23, v14, v12
; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s23
; GFX10-NEXT: v_mul_lo_u32 v14, v8, v0
-; GFX10-NEXT: v_add_co_u32 v3, s23, v3, v6
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23
-; GFX10-NEXT: v_add_co_u32 v6, s23, v10, v15
-; GFX10-NEXT: v_mul_lo_u32 v15, v5, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23
+; GFX10-NEXT: v_add_co_u32 v5, s23, v5, v11
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s23
+; GFX10-NEXT: v_add_co_u32 v11, s23, v12, v15
+; GFX10-NEXT: v_mul_lo_u32 v15, v10, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s23
+; GFX10-NEXT: v_add_nc_u32_e32 v5, v6, v5
+; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v14
; GFX10-NEXT: v_mul_hi_u32 v16, v8, v0
-; GFX10-NEXT: v_mul_hi_u32 v17, v5, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v0, v4, v3
-; GFX10-NEXT: v_add_co_u32 v4, s23, v11, v14
-; GFX10-NEXT: v_add_nc_u32_e32 v3, v13, v10
-; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23
-; GFX10-NEXT: v_add_co_u32 v2, s23, v15, v2
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s23
-; GFX10-NEXT: v_add_co_u32 v0, s23, v6, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s23
-; GFX10-NEXT: v_add_co_u32 v4, s23, v4, v12
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23
-; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v16
-; GFX10-NEXT: v_add3_u32 v1, v3, v6, v1
-; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v7, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v3, v10, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v6, v13, v12
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s23
-; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v9, v1, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s21, v6, 0
+; GFX10-NEXT: v_add_co_u32 v4, s23, v15, v4
+; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s23
+; GFX10-NEXT: v_add_co_u32 v5, s23, v11, v5
+; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s23
; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v12
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23
-; GFX10-NEXT: v_mov_b32_e32 v10, 0
-; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2
-; GFX10-NEXT: v_mul_hi_u32 v11, v7, v0
-; GFX10-NEXT: v_add3_u32 v3, v4, v3, v17
-; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v3, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s23, s5, v8, 0
-; GFX10-NEXT: v_mad_u64_u32 v[4:5], s21, s21, v7, v[1:2]
-; GFX10-NEXT: v_mov_b32_e32 v1, v3
-; GFX10-NEXT: v_mul_lo_u32 v12, v9, v2
-; GFX10-NEXT: v_mul_hi_u32 v13, v8, v2
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s20, s20, v6, v[4:5]
-; GFX10-NEXT: v_mul_lo_u32 v4, v7, v0
-; GFX10-NEXT: v_mul_hi_u32 v5, v6, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s5, v9, v[1:2]
-; GFX10-NEXT: v_mul_hi_u32 v2, v9, v2
-; GFX10-NEXT: v_mul_lo_u32 v14, v6, v3
-; GFX10-NEXT: v_mul_lo_u32 v15, v7, v3
-; GFX10-NEXT: v_mul_hi_u32 v16, v6, v3
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s22, v8, v[0:1]
-; GFX10-NEXT: v_mul_hi_u32 v1, v7, v3
-; GFX10-NEXT: v_add_co_u32 v3, s5, v4, v14
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v11, s5, v15, v11
-; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v5
-; GFX10-NEXT: v_mul_lo_u32 v15, v8, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s23
+; GFX10-NEXT: v_add_co_u32 v3, s23, v4, v16
+; GFX10-NEXT: v_add3_u32 v1, v6, v11, v1
+; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v7, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v5, v12, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23
+; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v9, v1, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s23, s21, v11, 0
+; GFX10-NEXT: v_add_co_u32 v5, s23, v3, v5
+; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v6, v13, v4
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s23
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: v_add_co_u32 v13, vcc_lo, v8, v5
+; GFX10-NEXT: v_mov_b32_e32 v3, v2
+; GFX10-NEXT: v_add3_u32 v0, v6, v7, v0
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s23, s5, v13, 0
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s21, s21, v12, v[3:4]
+; GFX10-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, v10, v0, vcc_lo
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: v_mul_hi_u32 v9, v11, v1
+; GFX10-NEXT: v_mov_b32_e32 v7, v6
+; GFX10-NEXT: v_mul_hi_u32 v16, v14, v5
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s20, s20, v11, v[2:3]
+; GFX10-NEXT: v_mul_lo_u32 v3, v12, v1
+; GFX10-NEXT: v_mad_u64_u32 v[6:7], s5, s5, v14, v[7:8]
+; GFX10-NEXT: v_mul_lo_u32 v7, v14, v5
+; GFX10-NEXT: v_mul_hi_u32 v1, v12, v1
+; GFX10-NEXT: v_mul_hi_u32 v8, v13, v5
+; GFX10-NEXT: v_mul_lo_u32 v15, v11, v2
+; GFX10-NEXT: v_mul_lo_u32 v17, v12, v2
+; GFX10-NEXT: v_mul_hi_u32 v18, v11, v2
+; GFX10-NEXT: v_mul_hi_u32 v2, v12, v2
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s5, s22, v13, v[6:7]
+; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v15
+; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v1, s5, v17, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s5
+; GFX10-NEXT: v_mul_lo_u32 v17, v13, v5
+; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v9
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v18
+; GFX10-NEXT: v_mul_lo_u32 v18, v14, v5
+; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v6, v3
+; GFX10-NEXT: v_add_co_u32 v7, s5, v7, v17
+; GFX10-NEXT: v_mul_hi_u32 v19, v13, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v6, v15, v9
+; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v15, s5, v18, v16
+; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v3
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v5, s5, v11, v16
-; GFX10-NEXT: v_mul_lo_u32 v16, v9, v0
+; GFX10-NEXT: v_add_co_u32 v7, s5, v7, v8
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v8, s5, v15, v19
+; GFX10-NEXT: v_add3_u32 v2, v6, v3, v2
+; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v11, v1
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v9, v7
+; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s5
+; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v12, v2, vcc_lo
+; GFX10-NEXT: v_mul_hi_u32 v5, v14, v5
+; GFX10-NEXT: v_add_co_u32 v3, s5, v8, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v6, v16, v15
+; GFX10-NEXT: v_mul_lo_u32 v7, s1, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s5
+; GFX10-NEXT: v_mul_lo_u32 v11, s0, v2
+; GFX10-NEXT: v_mul_hi_u32 v9, s0, v1
+; GFX10-NEXT: v_mul_hi_u32 v1, s1, v1
+; GFX10-NEXT: v_mul_lo_u32 v12, s1, v2
+; GFX10-NEXT: v_add3_u32 v5, v6, v8, v5
+; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v13, v3
+; GFX10-NEXT: v_mul_hi_u32 v6, s0, v2
+; GFX10-NEXT: v_add_co_u32 v7, s5, v7, v11
+; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v14, v5, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3
-; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0
-; GFX10-NEXT: v_mul_hi_u32 v0, v9, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v4, v14, v11
-; GFX10-NEXT: v_add_co_u32 v11, s5, v12, v15
+; GFX10-NEXT: v_add_co_u32 v1, s5, v12, v1
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v2, s5, v16, v2
+; GFX10-NEXT: v_add_co_u32 v7, s5, v7, v9
+; GFX10-NEXT: v_mul_hi_u32 v8, s1, v2
+; GFX10-NEXT: v_mul_lo_u32 v2, s19, v3
+; GFX10-NEXT: v_mul_lo_u32 v14, s18, v5
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5
+; GFX10-NEXT: v_mul_hi_u32 v13, s18, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v7, v11, v7
+; GFX10-NEXT: v_mul_hi_u32 v3, s19, v3
+; GFX10-NEXT: v_add_co_u32 v11, s5, v2, v14
+; GFX10-NEXT: v_mul_lo_u32 v9, s19, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v6, v12, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s5
+; GFX10-NEXT: v_add_co_u32 v7, s5, v1, v7
+; GFX10-NEXT: v_mul_hi_u32 v15, s18, v5
; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v3, s5, v5, v3
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5
+; GFX10-NEXT: v_mul_hi_u32 v5, s19, v5
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s6, v7, 0
+; GFX10-NEXT: v_add_co_u32 v3, s20, v9, v3
; GFX10-NEXT: v_add_co_u32 v11, s5, v11, v13
; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v17
-; GFX10-NEXT: v_add3_u32 v1, v4, v5, v1
-; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v6, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v11
-; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s5
-; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v1, vcc_lo
-; GFX10-NEXT: v_mul_lo_u32 v6, s1, v3
-; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v4
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v14, v13
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5
-; GFX10-NEXT: v_mul_lo_u32 v11, s0, v1
-; GFX10-NEXT: v_mul_hi_u32 v7, s0, v3
-; GFX10-NEXT: v_mul_hi_u32 v3, s1, v3
-; GFX10-NEXT: v_mul_lo_u32 v12, s1, v1
-; GFX10-NEXT: v_add3_u32 v0, v5, v4, v0
-; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2
-; GFX10-NEXT: v_mul_hi_u32 v4, s0, v1
-; GFX10-NEXT: v_mul_hi_u32 v5, s1, v1
-; GFX10-NEXT: v_add_co_u32 v1, s5, v6, v11
-; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v9, v0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v3, s5, v12, v3
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v7
-; GFX10-NEXT: v_mul_lo_u32 v0, s19, v2
-; GFX10-NEXT: v_mul_lo_u32 v12, s18, v8
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v4
-; GFX10-NEXT: v_mul_hi_u32 v9, s18, v2
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5
-; GFX10-NEXT: v_mul_hi_u32 v2, s19, v2
-; GFX10-NEXT: v_mul_lo_u32 v7, s19, v8
-; GFX10-NEXT: v_add_nc_u32_e32 v1, v6, v1
-; GFX10-NEXT: v_add_co_u32 v6, s5, v0, v12
-; GFX10-NEXT: v_mul_hi_u32 v13, s18, v8
-; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v4
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v12, s5, v3, v1
-; GFX10-NEXT: v_add_co_u32 v2, s20, v7, v2
-; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s5
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s6, v12, 0
-; GFX10-NEXT: v_add_co_u32 v6, s5, v6, v9
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5
-; GFX10-NEXT: v_add_co_u32 v9, s5, v2, v13
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s20
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5
-; GFX10-NEXT: v_add3_u32 v4, v4, v7, v5
-; GFX10-NEXT: v_add_nc_u32_e32 v6, v11, v6
-; GFX10-NEXT: v_mul_hi_u32 v5, s19, v8
-; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v12, 1
-; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v2
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s6, v4, v[1:2]
-; GFX10-NEXT: v_add_co_u32 v6, s5, v9, v6
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5
-; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v4, vcc_lo
+; GFX10-NEXT: v_add_co_u32 v13, s5, v3, v15
+; GFX10-NEXT: v_add3_u32 v6, v6, v14, v8
+; GFX10-NEXT: v_mov_b32_e32 v3, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s5
+; GFX10-NEXT: v_add_nc_u32_e32 v8, v12, v11
+; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s20
; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v7, 1
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s7, v12, v[1:2]
-; GFX10-NEXT: v_add3_u32 v5, v3, v9, v5
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, s2, v6, 0
-; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v8, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s0, v0
-; GFX10-NEXT: v_sub_nc_u32_e32 v9, s1, v1
-; GFX10-NEXT: v_sub_co_ci_u32_e64 v15, s0, s1, v1, vcc_lo
-; GFX10-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v14, s6
-; GFX10-NEXT: v_subrev_co_ci_u32_e64 v16, s0, 0, v9, vcc_lo
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v14
-; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v3
-; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v16
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, s6, v6, v[3:4]
+; GFX10-NEXT: v_add_co_u32 v8, s5, v13, v8
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5
+; GFX10-NEXT: v_add_nc_u32_e32 v9, v9, v15
+; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v6, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, s7, v7, v[2:3]
+; GFX10-NEXT: v_add_co_u32 v13, vcc_lo, v11, 1
+; GFX10-NEXT: v_add3_u32 v5, v9, v4, v5
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s5, s2, v8, 0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo
+; GFX10-NEXT: v_sub_nc_u32_e32 v9, s1, v2
+; GFX10-NEXT: v_sub_co_u32 v15, vcc_lo, s0, v1
+; GFX10-NEXT: v_sub_co_ci_u32_e64 v16, s0, s1, v2, vcc_lo
+; GFX10-NEXT: v_subrev_co_ci_u32_e32 v17, vcc_lo, s7, v9, vcc_lo
+; GFX10-NEXT: v_mov_b32_e32 v9, v4
+; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v15, s6
+; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s0, 0, v17, vcc_lo
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v15
; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s0
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v15
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v4
; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v5, v[0:1]
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v18
+; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, -1, s0
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v16
+; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, -1, s0
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s2, v5, v[9:10]
+; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v18
+; GFX10-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, s7, v17, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v21, v20, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v16
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v19, v18, s0
-; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v15
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v17, v20, v17, s0
-; GFX10-NEXT: v_sub_co_u32 v1, s0, v3, s6
-; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v9, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s3, v6, v[0:1]
-; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v7, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s18, v2
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v8, s0
-; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s1, s19, v0, vcc_lo
-; GFX10-NEXT: v_sub_nc_u32_e32 v0, s19, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v14, v3, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s0
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v8
-; GFX10-NEXT: v_xor_b32_e32 v1, s16, v1
-; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v0, vcc_lo
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2
-; GFX10-NEXT: v_xor_b32_e32 v4, s17, v4
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0
-; GFX10-NEXT: v_xor_b32_e32 v3, s4, v3
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v22, v19, s0
+; GFX10-NEXT: v_sub_co_u32 v2, s0, v4, s6
+; GFX10-NEXT: v_subrev_co_ci_u32_e64 v10, s0, 0, v10, s0
+; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v13, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s3, v8, v[1:2]
+; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v9
+; GFX10-NEXT: v_cndmask_b32_e32 v9, v12, v14, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v11, s0
+; GFX10-NEXT: v_cndmask_b32_e32 v7, v18, v10, vcc_lo
+; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, s18, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v9, s0
+; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s1, s19, v1, vcc_lo
+; GFX10-NEXT: v_sub_nc_u32_e32 v1, s19, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v15, v4, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v16, v7, s0
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v9
+; GFX10-NEXT: v_xor_b32_e32 v2, s16, v2
+; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v1, vcc_lo
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v3
+; GFX10-NEXT: v_xor_b32_e32 v6, s17, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s0
+; GFX10-NEXT: v_xor_b32_e32 v4, s4, v4
; GFX10-NEXT: v_xor_b32_e32 v7, s4, v7
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v2, s2
+; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v3, s2
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s0, 0, v11, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v0, s0, v1, s16
-; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s17, v4, s0
-; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v8
+; GFX10-NEXT: v_sub_co_u32 v1, s0, v2, s16
+; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s0, s17, v6, s0
+; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v9
; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v11, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v9, v12, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v12, s0
; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v14
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s0
; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v13
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0
-; GFX10-NEXT: v_add_co_u32 v15, s0, v6, 1
+; GFX10-NEXT: v_add_co_u32 v15, s0, v8, 1
; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v5, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v14
-; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v12, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v12, s0
; GFX10-NEXT: v_add_co_u32 v12, s0, v15, 1
; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v16, s0
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
-; GFX10-NEXT: v_sub_co_u32 v9, s0, v13, s2
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10
+; GFX10-NEXT: v_sub_co_u32 v10, s0, v13, s2
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v11, s0
; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v12, vcc_lo
-; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v4
+; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v6
; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v17, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v13, v9, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v11, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v12, s0
+; GFX10-NEXT: v_cndmask_b32_e32 v6, v13, v10, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v10, v14, v11, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0
; GFX10-NEXT: v_cndmask_b32_e64 v11, v5, v15, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v4, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v9, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v6, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v10, s0
; GFX10-NEXT: s_xor_b64 s[0:1], s[8:9], s[10:11]
-; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v3, s4
-; GFX10-NEXT: v_xor_b32_e32 v3, s0, v6
-; GFX10-NEXT: v_xor_b32_e32 v6, s1, v11
-; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v7, vcc_lo
-; GFX10-NEXT: v_xor_b32_e32 v7, s8, v2
-; GFX10-NEXT: v_xor_b32_e32 v8, s8, v8
-; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s0
-; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v6, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v7, s8
-; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s8, v8, vcc_lo
-; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[12:13]
-; GFX10-NEXT: global_store_dwordx4 v10, v[4:7], s[14:15]
+; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, v4, s4
+; GFX10-NEXT: v_xor_b32_e32 v4, s0, v8
+; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s4, v7, vcc_lo
+; GFX10-NEXT: v_xor_b32_e32 v7, s1, v11
+; GFX10-NEXT: v_xor_b32_e32 v8, s8, v3
+; GFX10-NEXT: v_xor_b32_e32 v9, s8, v9
+; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v4, s0
+; GFX10-NEXT: v_subrev_co_ci_u32_e32 v4, vcc_lo, s1, v7, vcc_lo
+; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, v8, s8
+; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s8, v9, vcc_lo
+; GFX10-NEXT: global_store_dwordx4 v0, v[1:4], s[12:13]
+; GFX10-NEXT: global_store_dwordx4 v0, v[5:8], s[14:15]
; GFX10-NEXT: s_endpgm
%div = sdiv <2 x i64> %x, %y
store <2 x i64> %div, ptr addrspace(1) %out0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 19dc20c510041..e77a514a06857 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -41,8 +41,10 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v6
; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v6
+; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4]
+; CHECK-NEXT: v_mov_b32_e32 v6, v3
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7]
; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2
; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
; CHECK-NEXT: v_mul_hi_u32 v7, v8, v2
@@ -68,61 +70,65 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2
; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v3, vcc
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4]
+; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
+; CHECK-NEXT: v_mov_b32_e32 v6, v3
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7]
; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v5
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v9
; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v9, vcc
-; CHECK-NEXT: v_xor_b32_e32 v5, v3, v9
+; CHECK-NEXT: v_xor_b32_e32 v7, v3, v9
; CHECK-NEXT: v_mul_lo_u32 v3, v11, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, v8, v6
+; CHECK-NEXT: v_mul_lo_u32 v5, v8, v6
; CHECK-NEXT: v_xor_b32_e32 v10, v4, v9
; CHECK-NEXT: v_mul_hi_u32 v4, v8, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v11, v2
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT: v_mul_hi_u32 v5, v8, v6
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
-; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_mul_hi_u32 v5, v11, v6
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc
; CHECK-NEXT: v_mul_lo_u32 v4, v10, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v2
+; CHECK-NEXT: v_mul_lo_u32 v5, v7, v3
+; CHECK-NEXT: v_mul_hi_u32 v6, v7, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v10, v2
; CHECK-NEXT: v_mul_hi_u32 v8, v10, v3
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v7, v10, v3
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v3
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT: v_mul_lo_u32 v6, v10, v3
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT: v_mul_hi_u32 v5, v7, v3
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v2, v4
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v7, 0
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v2, v4
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v6, 0
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
-; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v7, v[3:4]
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v4
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v8, v[4:5]
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v6, v[3:4]
; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1
@@ -214,60 +220,64 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CHECK-NEXT: v_trunc_f32_e32 v2, v1
; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v0
-; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v2
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2]
-; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
-; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT: v_mul_lo_u32 v6, v3, v1
-; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1
-; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1
-; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
+; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v0
+; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v4, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v5, v[2:3]
+; CHECK-NEXT: v_mul_hi_u32 v3, v4, v0
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v4, v[1:2]
+; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0
+; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
+; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1
+; CHECK-NEXT: v_mul_lo_u32 v7, v5, v1
+; CHECK-NEXT: v_mul_hi_u32 v8, v4, v1
+; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0
-; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
-; CHECK-NEXT: v_mul_hi_u32 v6, v3, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2]
-; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
-; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v0
+; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v4, 0
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v5, v[2:3]
+; CHECK-NEXT: v_mul_hi_u32 v6, v4, v0
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v4, v[1:2]
+; CHECK-NEXT: v_mul_lo_u32 v2, v5, v0
+; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
+; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1
+; CHECK-NEXT: v_mul_lo_u32 v6, v5, v1
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, s11, v0
; CHECK-NEXT: v_mul_lo_u32 v3, s10, v1
; CHECK-NEXT: v_mul_hi_u32 v4, s10, v0
@@ -289,12 +299,14 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2]
-; CHECK-NEXT: v_mov_b32_e32 v5, s11
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s10, v0
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v5, v[2:3]
+; CHECK-NEXT: v_mov_b32_e32 v5, s11
; CHECK-NEXT: v_mov_b32_e32 v3, s9
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s11, v1
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
@@ -380,47 +392,49 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v4
; GISEL-NEXT: v_trunc_f32_e32 v11, v9
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v11
-; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v11
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v10
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v15, v9
-; GISEL-NEXT: v_mul_hi_u32 v16, v12, v9
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11]
+; GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0
+; GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[11:12]
+; GISEL-NEXT: v_mul_hi_u32 v16, v4, v9
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v4, v[10:11]
+; GISEL-NEXT: v_mul_lo_u32 v11, v15, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9
-; GISEL-NEXT: v_mul_lo_u32 v11, v12, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v11
+; GISEL-NEXT: v_mul_lo_u32 v12, v4, v10
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v16, v15, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT: v_mul_hi_u32 v11, v12, v10
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_mul_hi_u32 v12, v4, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
+; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v4
-; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v9, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v10
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5]
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v4, v9
+; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v15, v10, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v16, 0
+; GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11]
+; GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[11:12]
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v13, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[10:11]
+; GISEL-NEXT: v_xor_b32_e32 v12, v0, v4
; GISEL-NEXT: v_mul_lo_u32 v0, v15, v9
-; GISEL-NEXT: v_mul_lo_u32 v11, v12, v10
-; GISEL-NEXT: v_xor_b32_e32 v14, v1, v4
-; GISEL-NEXT: v_mul_hi_u32 v1, v12, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v16, v10
+; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT: v_mul_hi_u32 v1, v16, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v15, v9
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
@@ -428,7 +442,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v1, v15, v10
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v11, v12, v10
+; GISEL-NEXT: v_mul_hi_u32 v11, v16, v10
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
@@ -439,194 +453,200 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v15, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v9, v14, v0
-; GISEL-NEXT: v_mul_lo_u32 v10, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v11, v13, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_mul_lo_u32 v9, v13, v0
+; GISEL-NEXT: v_mul_lo_u32 v10, v12, v1
+; GISEL-NEXT: v_mul_hi_u32 v11, v12, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v14, v1
+; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_mul_hi_u32 v10, v13, v1
+; GISEL-NEXT: v_mul_hi_u32 v10, v12, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_mul_hi_u32 v1, v14, v1
+; GISEL-NEXT: v_mul_hi_u32 v1, v13, v1
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v9
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v1, v0
-; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v7
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc
-; GISEL-NEXT: v_xor_b32_e32 v6, v6, v10
-; GISEL-NEXT: v_xor_b32_e32 v7, v7, v10
-; GISEL-NEXT: v_cvt_f32_u32_e32 v12, v6
-; GISEL-NEXT: v_cvt_f32_u32_e32 v15, v7
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v1, v0
+; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v7
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v6, v6, v9
+; GISEL-NEXT: v_xor_b32_e32 v7, v7, v9
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 0, v6
-; GISEL-NEXT: v_mac_f32_e32 v12, 0x4f800000, v15
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v9, v[1:2]
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v12
-; GISEL-NEXT: v_subb_u32_e32 v17, vcc, 0, v7, vcc
+; GISEL-NEXT: v_cvt_f32_u32_e32 v15, v6
+; GISEL-NEXT: v_cvt_f32_u32_e32 v16, v7
+; GISEL-NEXT: v_mov_b32_e32 v9, v1
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v14, v[9:10]
+; GISEL-NEXT: v_mac_f32_e32 v15, 0x4f800000, v16
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v15
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v8, v11, v[9:10]
+; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 0, v6
; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v1
-; GISEL-NEXT: v_trunc_f32_e32 v12, v10
-; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v12
+; GISEL-NEXT: v_trunc_f32_e32 v14, v10
+; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v14
; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v1
-; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v13, v0
+; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14
+; GISEL-NEXT: v_subb_u32_e32 v17, vcc, 0, v7, vcc
; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v16, v15, 0
+; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v12, v0
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: v_mov_b32_e32 v0, v11
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v12, v[0:1]
-; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v14, v9, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v14, v[0:1]
+; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v9, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v15, v[0:1]
-; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v14, v9
-; GISEL-NEXT: v_mul_lo_u32 v9, v12, v10
-; GISEL-NEXT: v_mul_lo_u32 v14, v15, v0
+; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v13, v9
+; GISEL-NEXT: v_mul_lo_u32 v9, v14, v10
+; GISEL-NEXT: v_mul_lo_u32 v13, v15, v0
; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v8
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v15, v10
+; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v15, v10
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14
+; GISEL-NEXT: v_mul_hi_u32 v10, v14, v10
+; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v11, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v13, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v13, s[6:7]
+; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v12, v5
; GISEL-NEXT: v_subbrev_u32_e64 v19, s[6:7], 0, v1, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v19, v8
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[6:7]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v8
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v14, v5
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v13, v5
; GISEL-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[6:7]
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v8, v19, v1, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v14, v12, v0
+; GISEL-NEXT: v_mul_lo_u32 v13, v14, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v18, v1
; GISEL-NEXT: v_mul_hi_u32 v18, v15, v0
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18
-; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
+; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v1
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v0, vcc
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v14, v0, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v10, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v8, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v16, v12, v[1:2]
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v1, v5, v4
+; GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v12, v5, vcc
+; GISEL-NEXT: v_mov_b32_e32 v8, v1
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v16, v13, v[8:9]
+; GISEL-NEXT: v_ashrrev_i32_e32 v12, 31, v3
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v12
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v17, v10, v[8:9]
-; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v13, v2, v5
-; GISEL-NEXT: v_mul_lo_u32 v2, v12, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT: v_xor_b32_e32 v14, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v12, v8
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v12, vcc
+; GISEL-NEXT: v_xor_b32_e32 v14, v1, v12
+; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v10, v8
+; GISEL-NEXT: v_xor_b32_e32 v15, v2, v12
+; GISEL-NEXT: v_mul_hi_u32 v2, v10, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v2, v13, v8
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1
+; GISEL-NEXT: v_mul_hi_u32 v3, v10, v8
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v3, v13, v8
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v12, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v14, v0
-; GISEL-NEXT: v_mul_lo_u32 v8, v13, v2
-; GISEL-NEXT: v_mul_hi_u32 v9, v13, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v13, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v2, v15, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v14, v1
+; GISEL-NEXT: v_mul_hi_u32 v8, v14, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
; GISEL-NEXT: v_xor_b32_e32 v10, v11, v4
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v9, v14, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v8, v3
-; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v8, v15, v1
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v14, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v14, v2
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v3
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v2
+; GISEL-NEXT: v_mul_hi_u32 v1, v15, v1
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v11, 0
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v1, v0
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4
; GISEL-NEXT: v_mov_b32_e32 v0, v3
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[0:1]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v4
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v10, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v11, v[8:9]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v13, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v14, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v14, v3
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v14, v2
+; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v7
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[4:5]
-; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v6
-; GISEL-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v2, v6
+; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v7
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v7
+; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5]
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v12
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v12
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v12
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64:
@@ -651,100 +671,106 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_xor_b32_e32 v1, v2, v1
; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0
; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v0
-; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v1, vcc
+; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v0
+; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v1, vcc
; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; CGP-NEXT: v_trunc_f32_e32 v4, v3
; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v2
-; CGP-NEXT: v_cvt_u32_f32_e32 v14, v4
-; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4]
-; CGP-NEXT: v_mul_hi_u32 v15, v5, v2
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4]
-; CGP-NEXT: v_mul_lo_u32 v4, v14, v2
-; CGP-NEXT: v_mul_hi_u32 v2, v14, v2
-; CGP-NEXT: v_mul_lo_u32 v16, v5, v3
-; CGP-NEXT: v_mul_lo_u32 v17, v14, v3
+; CGP-NEXT: v_cvt_u32_f32_e32 v12, v2
+; CGP-NEXT: v_cvt_u32_f32_e32 v15, v4
+; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v12, 0
+; CGP-NEXT: v_mov_b32_e32 v4, v3
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v15, v[4:5]
+; CGP-NEXT: v_mul_hi_u32 v5, v12, v2
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v14, v12, v[3:4]
+; CGP-NEXT: v_mul_lo_u32 v4, v15, v2
+; CGP-NEXT: v_mul_hi_u32 v2, v15, v2
+; CGP-NEXT: v_mul_lo_u32 v16, v12, v3
+; CGP-NEXT: v_mul_lo_u32 v17, v15, v3
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v16
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15
-; CGP-NEXT: v_mul_hi_u32 v15, v5, v3
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v12, v3
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, v16, v4
; CGP-NEXT: v_add_i32_e32 v2, vcc, v17, v2
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
-; CGP-NEXT: v_mul_hi_u32 v3, v14, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5
+; CGP-NEXT: v_mul_hi_u32 v3, v15, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v2
-; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v3, vcc
-; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4]
-; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; CGP-NEXT: v_mul_hi_u32 v15, v5, v2
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4]
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v12
-; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v12, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v4, v12
-; CGP-NEXT: v_mul_lo_u32 v4, v14, v2
-; CGP-NEXT: v_mul_lo_u32 v13, v5, v3
-; CGP-NEXT: v_mul_hi_u32 v2, v14, v2
-; CGP-NEXT: v_xor_b32_e32 v10, v10, v12
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v2
+; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v3, vcc
+; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v12, 0
+; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT: v_mov_b32_e32 v4, v3
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v15, v[4:5]
+; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v11
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v14, v12, v[3:4]
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v13
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, v11, v13, vcc
+; CGP-NEXT: v_xor_b32_e32 v10, v4, v13
+; CGP-NEXT: v_mul_lo_u32 v4, v15, v2
+; CGP-NEXT: v_mul_lo_u32 v11, v12, v3
+; CGP-NEXT: v_xor_b32_e32 v14, v5, v13
+; CGP-NEXT: v_mul_hi_u32 v5, v12, v2
+; CGP-NEXT: v_mul_hi_u32 v2, v15, v2
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v15, v14, v3
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT: v_mul_hi_u32 v13, v5, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v15, v2
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; CGP-NEXT: v_mul_hi_u32 v3, v14, v3
+; CGP-NEXT: v_mul_lo_u32 v5, v15, v3
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4
+; CGP-NEXT: v_mul_hi_u32 v11, v12, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_mul_hi_u32 v3, v15, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v14, v3, vcc
-; CGP-NEXT: v_mul_lo_u32 v4, v10, v2
-; CGP-NEXT: v_mul_lo_u32 v5, v11, v3
-; CGP-NEXT: v_mul_hi_u32 v13, v11, v2
-; CGP-NEXT: v_mul_hi_u32 v2, v10, v2
-; CGP-NEXT: v_mul_hi_u32 v14, v10, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v15, v3, vcc
+; CGP-NEXT: v_mul_lo_u32 v4, v14, v2
+; CGP-NEXT: v_mul_lo_u32 v5, v10, v3
+; CGP-NEXT: v_mul_hi_u32 v11, v10, v2
+; CGP-NEXT: v_mul_hi_u32 v2, v14, v2
+; CGP-NEXT: v_mul_hi_u32 v12, v14, v3
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v13, v10, v3
+; CGP-NEXT: v_mul_lo_u32 v11, v14, v3
; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v11, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v13, v2
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v5, v10, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v13, v5
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v2, v4
-; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v13, 0
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v2, v4
+; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v11, 0
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v13, v[3:4]
-; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v4
+; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT: v_mov_b32_e32 v4, v3
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v12, v[4:5]
+; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v11, v[3:4]
+; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v14, v3, vcc
+; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v14, v3
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0
@@ -755,24 +781,24 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_sub_i32_e32 v10, vcc, v2, v0
; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v1
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v0
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v11, v1
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
-; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; CGP-NEXT: v_xor_b32_e32 v0, v0, v12
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v12
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v12
-; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v12, vcc
+; CGP-NEXT: v_xor_b32_e32 v0, v0, v13
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v13
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v13
+; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc
; CGP-NEXT: ; implicit-def: $vgpr4
; CGP-NEXT: ; implicit-def: $vgpr10
; CGP-NEXT: .LBB2_2: ; %Flow1
@@ -820,100 +846,106 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_xor_b32_e32 v3, v4, v3
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v5, v3
-; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v2
-; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v2
+; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4
-; CGP-NEXT: v_cvt_u32_f32_e32 v12, v6
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6]
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v4
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6]
-; CGP-NEXT: v_mul_lo_u32 v6, v12, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v12, v4
-; CGP-NEXT: v_mul_lo_u32 v14, v7, v5
-; CGP-NEXT: v_mul_lo_u32 v15, v12, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v10, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v13, v6
+; CGP-NEXT: ; implicit-def: $vgpr6_vgpr7
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; CGP-NEXT: v_mov_b32_e32 v6, v5
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v13, v[6:7]
+; CGP-NEXT: v_mul_hi_u32 v7, v10, v4
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
+; CGP-NEXT: v_mul_lo_u32 v6, v13, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v13, v4
+; CGP-NEXT: v_mul_lo_u32 v14, v10, v5
+; CGP-NEXT: v_mul_lo_u32 v15, v13, v5
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v14
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v10, v5
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v6, vcc, v14, v6
; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT: v_mul_hi_u32 v5, v12, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7
+; CGP-NEXT: v_mul_hi_u32 v5, v13, v5
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v4
-; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v5, vcc
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6]
-; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v4
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6]
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v10
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v10, vcc
-; CGP-NEXT: v_xor_b32_e32 v9, v6, v10
-; CGP-NEXT: v_mul_lo_u32 v6, v12, v4
-; CGP-NEXT: v_mul_lo_u32 v11, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v4, v12, v4
-; CGP-NEXT: v_xor_b32_e32 v8, v8, v10
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v4
+; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v5, vcc
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; CGP-NEXT: ; implicit-def: $vgpr6_vgpr7
+; CGP-NEXT: v_mov_b32_e32 v6, v5
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v13, v[6:7]
+; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v11
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v9, v11, vcc
+; CGP-NEXT: v_xor_b32_e32 v8, v6, v11
+; CGP-NEXT: v_mul_lo_u32 v6, v13, v4
+; CGP-NEXT: v_mul_lo_u32 v9, v10, v5
+; CGP-NEXT: v_xor_b32_e32 v12, v7, v11
+; CGP-NEXT: v_mul_hi_u32 v7, v10, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v13, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v13, v12, v5
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6
-; CGP-NEXT: v_mul_hi_u32 v11, v7, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT: v_mul_hi_u32 v5, v12, v5
+; CGP-NEXT: v_mul_lo_u32 v7, v13, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT: v_mul_hi_u32 v9, v10, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v5, v13, v5
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT: v_addc_u32_e32 v5, vcc, v12, v5, vcc
-; CGP-NEXT: v_mul_lo_u32 v6, v8, v4
-; CGP-NEXT: v_mul_lo_u32 v7, v9, v5
-; CGP-NEXT: v_mul_hi_u32 v11, v9, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v8, v4
-; CGP-NEXT: v_mul_hi_u32 v12, v8, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, v13, v5, vcc
+; CGP-NEXT: v_mul_lo_u32 v6, v12, v4
+; CGP-NEXT: v_mul_lo_u32 v7, v8, v5
+; CGP-NEXT: v_mul_hi_u32 v9, v8, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v12, v4
+; CGP-NEXT: v_mul_hi_u32 v10, v12, v5
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v8, v5
+; CGP-NEXT: v_mul_lo_u32 v9, v12, v5
; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT: v_mul_hi_u32 v7, v9, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v7, v8, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v6
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v11, 0
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v4, v6
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6]
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v11, v[5:6]
-; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v8, v5, vcc
-; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v8, v5
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v6
+; CGP-NEXT: ; implicit-def: $vgpr6_vgpr7
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT: v_mov_b32_e32 v6, v5
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v10, v[6:7]
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6]
+; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v12, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2
@@ -924,24 +956,24 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_sub_i32_e32 v8, vcc, v4, v2
; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v5, v3, vcc
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
-; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v11
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v11
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
; CGP-NEXT: ; implicit-def: $vgpr6
; CGP-NEXT: ; implicit-def: $vgpr8
; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
@@ -977,77 +1009,81 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v6, 0xfffff000
+; CHECK-NEXT: v_mov_b32_e32 v7, 0xfffff000
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; CHECK-NEXT: v_trunc_f32_e32 v4, v3
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v4
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
-; CHECK-NEXT: v_mul_lo_u32 v4, v7, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
-; CHECK-NEXT: v_mul_lo_u32 v9, v5, v3
-; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3
-; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
+; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v2
+; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v4
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[4:5]
+; CHECK-NEXT: v_mul_hi_u32 v5, v6, v2
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v6, v[3:4]
+; CHECK-NEXT: v_mul_lo_u32 v4, v8, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v8, v2
+; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3
+; CHECK-NEXT: v_mul_lo_u32 v10, v8, v3
+; CHECK-NEXT: v_mul_hi_u32 v11, v6, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v8, v3
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
-; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
-; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
-; CHECK-NEXT: v_xor_b32_e32 v4, v0, v6
-; CHECK-NEXT: v_mul_lo_u32 v0, v7, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3
-; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6
-; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v2
+; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v3, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[4:5]
+; CHECK-NEXT: v_ashrrev_i32_e32 v5, 31, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v6, v[3:4]
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT: v_xor_b32_e32 v4, v0, v5
+; CHECK-NEXT: v_mul_lo_u32 v0, v8, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, v6, v3
+; CHECK-NEXT: v_xor_b32_e32 v9, v1, v5
+; CHECK-NEXT: v_mul_hi_u32 v1, v6, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v8, v2
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3
+; CHECK-NEXT: v_mul_lo_u32 v1, v8, v3
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT: v_mul_hi_u32 v7, v6, v3
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT: v_mul_hi_u32 v3, v8, v3
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0
; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0
-; CHECK-NEXT: v_mov_b32_e32 v5, 0x1000
+; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
@@ -1062,37 +1098,39 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[2:3]
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5
+; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v5
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6
; CHECK-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5
+; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v4, v6
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[4:5]
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6
-; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CHECK-NEXT: v_xor_b32_e32 v0, v0, v5
+; CHECK-NEXT: v_xor_b32_e32 v1, v1, v5
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = srem i64 %num, 4096
ret i64 %result
@@ -1105,6 +1143,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; GISEL-NEXT: v_mov_b32_e32 v6, 0xfffff000
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
@@ -1141,42 +1180,43 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4
-; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v4, 0
+; GISEL-NEXT: ; implicit-def: $vgpr15_vgpr16
; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v14
-; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13
-; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
+; GISEL-NEXT: v_mov_b32_e32 v15, v14
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[15:16]
+; GISEL-NEXT: v_mul_lo_u32 v9, v17, v13
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v4, v[14:15]
; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14
+; GISEL-NEXT: v_mul_lo_u32 v15, v4, v14
; GISEL-NEXT: s_subb_u32 s6, 0, 0
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13
-; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT: v_mul_hi_u32 v15, v4, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v13
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v15, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v4, v14
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v9
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v16
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v14
; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v15, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v13, v19, v0
; GISEL-NEXT: v_mul_lo_u32 v14, v18, v1
@@ -1200,23 +1240,26 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v13
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GISEL-NEXT: v_mov_b32_e32 v13, v1
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v15, v[13:14]
; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v18, v0
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13
; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: v_mov_b32_e32 v0, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4
@@ -1242,95 +1285,99 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v1
+; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v8, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9
-; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
-; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11
-; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5
-; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11
-; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v14, v16, vcc
+; GISEL-NEXT: v_xor_b32_e32 v13, v7, v9
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v15, v5, vcc
+; GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v11, v[7:8]
+; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, v[5:6]
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v7
+; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v10, v5
+; GISEL-NEXT: v_xor_b32_e32 v1, v12, v9
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v7
+; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5
+; GISEL-NEXT: v_mul_lo_u32 v3, v11, v5
+; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v10, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3
-; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
-; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v10, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT: v_mul_lo_u32 v6, v8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v9
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v3
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT: v_mul_lo_u32 v9, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v5
+; GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, v3
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, v[5:6]
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
+; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64_pow2k_denom:
@@ -1339,6 +1386,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; CGP-NEXT: v_mov_b32_e32 v6, 0xfffff000
+; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
@@ -1372,39 +1420,40 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
-; CGP-NEXT: v_add_i32_e32 v16, vcc, v7, v4
-; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v4, 0
+; CGP-NEXT: ; implicit-def: $vgpr15_vgpr16
; CGP-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc
-; CGP-NEXT: v_mov_b32_e32 v4, v14
-; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT: v_mul_lo_u32 v4, v17, v13
-; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT: v_mul_lo_u32 v9, v16, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v16, v13
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_hi_u32 v9, v17, v13
-; CGP-NEXT: v_mul_lo_u32 v13, v17, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v15, v16, v14
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v9, v15
+; CGP-NEXT: v_mov_b32_e32 v15, v14
+; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[15:16]
+; CGP-NEXT: v_mul_lo_u32 v9, v17, v13
+; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v4, v[14:15]
+; CGP-NEXT: v_mul_lo_u32 v15, v4, v14
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT: v_mul_hi_u32 v15, v4, v13
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v13, v17, v13
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v15, v17, v14
+; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v4, v14
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v9
; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
; CGP-NEXT: v_xor_b32_e32 v18, v0, v9
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v17, v14
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v16
+; CGP-NEXT: v_mul_hi_u32 v13, v17, v14
; CGP-NEXT: v_xor_b32_e32 v19, v1, v9
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v15, v1
; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v19, v0
; CGP-NEXT: v_mul_lo_u32 v14, v18, v1
@@ -1428,24 +1477,27 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v13
+; CGP-NEXT: ; implicit-def: $vgpr13_vgpr14
+; CGP-NEXT: v_mov_b32_e32 v13, v1
+; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v15, v[13:14]
; CGP-NEXT: v_sub_i32_e32 v14, vcc, v18, v0
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13
; CGP-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
-; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4
-; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
+; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
+; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
+; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; CGP-NEXT: v_mov_b32_e32 v0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v18, vcc
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v4
; CGP-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
@@ -1470,95 +1522,99 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v1
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v7, v1
+; CGP-NEXT: v_addc_u32_e32 v11, vcc, v8, v0, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT: ; implicit-def: $vgpr7_vgpr8
; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v16, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v5, v9
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
-; CGP-NEXT: v_cndmask_b32_e32 v10, v15, v17, vcc
-; CGP-NEXT: v_xor_b32_e32 v1, v10, v9
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6]
-; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT: v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT: v_mul_lo_u32 v2, v8, v0
-; CGP-NEXT: v_mul_lo_u32 v6, v7, v5
-; CGP-NEXT: v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT: v_mul_hi_u32 v3, v7, v0
-; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT: v_mov_b32_e32 v7, v1
+; CGP-NEXT: v_xor_b32_e32 v13, v5, v9
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v11, v[7:8]
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_cndmask_b32_e32 v12, v15, v17, vcc
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v10, v[5:6]
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT: v_xor_b32_e32 v8, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
+; CGP-NEXT: v_mul_lo_u32 v6, v10, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v12, v9
+; CGP-NEXT: v_xor_b32_e32 v12, v3, v7
+; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v3, v8, v5
+; CGP-NEXT: v_mul_lo_u32 v3, v11, v5
+; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT: v_mul_hi_u32 v6, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v6, v10, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v8, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v11, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v0
-; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc
-; CGP-NEXT: v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT: v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v9
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v0
+; CGP-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; CGP-NEXT: v_mul_lo_u32 v5, v12, v3
+; CGP-NEXT: v_mul_lo_u32 v6, v8, v2
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v13, v9
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CGP-NEXT: v_mul_hi_u32 v9, v8, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
+; CGP-NEXT: v_mul_lo_u32 v9, v12, v2
+; CGP-NEXT: v_mul_hi_u32 v3, v12, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v6, v8, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT: v_mul_hi_u32 v7, v13, v2
+; CGP-NEXT: v_mul_hi_u32 v9, v12, v2
; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v5
+; CGP-NEXT: ; implicit-def: $vgpr5_vgpr6
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT: v_mov_b32_e32 v5, v3
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, v[5:6]
+; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
@@ -1570,77 +1626,81 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; CHECK-NEXT: v_mov_b32_e32 v7, 0xffed2705
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; CHECK-NEXT: v_trunc_f32_e32 v4, v3
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
-; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v4
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
-; CHECK-NEXT: v_mul_lo_u32 v4, v7, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
-; CHECK-NEXT: v_mul_lo_u32 v9, v5, v3
-; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3
-; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3
-; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
+; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v2
+; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v4
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[4:5]
+; CHECK-NEXT: v_mul_hi_u32 v5, v6, v2
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v6, v[3:4]
+; CHECK-NEXT: v_mul_lo_u32 v4, v8, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v8, v2
+; CHECK-NEXT: v_mul_lo_u32 v9, v6, v3
+; CHECK-NEXT: v_mul_lo_u32 v10, v8, v3
+; CHECK-NEXT: v_mul_hi_u32 v11, v6, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v8, v3
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
-; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
-; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
-; CHECK-NEXT: v_xor_b32_e32 v4, v0, v6
-; CHECK-NEXT: v_mul_lo_u32 v0, v7, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3
-; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6
-; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v2
+; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v3, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[4:5]
+; CHECK-NEXT: v_ashrrev_i32_e32 v5, 31, v1
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v6, v[3:4]
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT: v_xor_b32_e32 v4, v0, v5
+; CHECK-NEXT: v_mul_lo_u32 v0, v8, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, v6, v3
+; CHECK-NEXT: v_xor_b32_e32 v9, v1, v5
+; CHECK-NEXT: v_mul_hi_u32 v1, v6, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, v8, v2
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3
+; CHECK-NEXT: v_mul_lo_u32 v1, v8, v3
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT: v_mul_hi_u32 v7, v6, v3
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT: v_mul_hi_u32 v3, v8, v3
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1
-; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
-; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0
; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0
-; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb
+; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
@@ -1655,37 +1715,39 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v7, v9, v1
-; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, 0
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[2:3]
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5
+; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v5
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6
; CHECK-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5
+; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v4, v6
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[4:5]
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
-; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6
-; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CHECK-NEXT: v_xor_b32_e32 v0, v0, v5
+; CHECK-NEXT: v_xor_b32_e32 v1, v1, v5
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = srem i64 %num, 1235195
ret i64 %result
@@ -1698,6 +1760,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; GISEL-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
@@ -1734,42 +1797,43 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4
-; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v4, 0
+; GISEL-NEXT: ; implicit-def: $vgpr15_vgpr16
; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v14
-; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13
-; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
+; GISEL-NEXT: v_mov_b32_e32 v15, v14
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[15:16]
+; GISEL-NEXT: v_mul_lo_u32 v9, v17, v13
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v4, v[14:15]
; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14
+; GISEL-NEXT: v_mul_lo_u32 v15, v4, v14
; GISEL-NEXT: s_subb_u32 s6, 0, 0
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13
-; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT: v_mul_hi_u32 v15, v4, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v13
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v15, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v4, v14
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v9
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v16
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v14
; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v15, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v13, v19, v0
; GISEL-NEXT: v_mul_lo_u32 v14, v18, v1
@@ -1793,23 +1857,26 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v13
+; GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GISEL-NEXT: v_mov_b32_e32 v13, v1
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v15, v[13:14]
; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v18, v0
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13
; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: v_mov_b32_e32 v0, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4
@@ -1835,95 +1902,99 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v1
+; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v8, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9
-; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
-; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11
-; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5
-; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11
-; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v14, v16, vcc
+; GISEL-NEXT: v_xor_b32_e32 v13, v7, v9
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v15, v5, vcc
+; GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v11, v[7:8]
+; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, v[5:6]
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v7
+; GISEL-NEXT: v_mul_lo_u32 v2, v11, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v10, v5
+; GISEL-NEXT: v_xor_b32_e32 v1, v12, v9
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v7
+; GISEL-NEXT: v_mul_hi_u32 v3, v10, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5
+; GISEL-NEXT: v_mul_lo_u32 v3, v11, v5
+; GISEL-NEXT: v_mul_hi_u32 v0, v11, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v10, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3
-; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
-; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v10, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT: v_mul_lo_u32 v6, v8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v9
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v3
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT: v_mul_lo_u32 v9, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v6, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v5
+; GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, v3
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, v[5:6]
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
+; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64_oddk_denom:
@@ -1932,6 +2003,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; CGP-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
@@ -1965,39 +2037,40 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
-; CGP-NEXT: v_add_i32_e32 v16, vcc, v7, v4
-; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v4, 0
+; CGP-NEXT: ; implicit-def: $vgpr15_vgpr16
; CGP-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc
-; CGP-NEXT: v_mov_b32_e32 v4, v14
-; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT: v_mul_lo_u32 v4, v17, v13
-; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT: v_mul_lo_u32 v9, v16, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v16, v13
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_hi_u32 v9, v17, v13
-; CGP-NEXT: v_mul_lo_u32 v13, v17, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v15, v16, v14
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v9, v15
+; CGP-NEXT: v_mov_b32_e32 v15, v14
+; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[15:16]
+; CGP-NEXT: v_mul_lo_u32 v9, v17, v13
+; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v4, v[14:15]
+; CGP-NEXT: v_mul_lo_u32 v15, v4, v14
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT: v_mul_hi_u32 v15, v4, v13
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v13, v17, v13
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v15, v17, v14
+; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v4, v14
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v9
; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc
; CGP-NEXT: v_xor_b32_e32 v18, v0, v9
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v17, v14
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v16
+; CGP-NEXT: v_mul_hi_u32 v13, v17, v14
; CGP-NEXT: v_xor_b32_e32 v19, v1, v9
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v15, v1
; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v19, v0
; CGP-NEXT: v_mul_lo_u32 v14, v18, v1
@@ -2021,24 +2094,27 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v13
+; CGP-NEXT: ; implicit-def: $vgpr13_vgpr14
+; CGP-NEXT: v_mov_b32_e32 v13, v1
+; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v15, v[13:14]
; CGP-NEXT: v_sub_i32_e32 v14, vcc, v18, v0
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13
; CGP-NEXT: v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
-; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v19, v13
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4
-; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
+; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; CGP-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
+; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
+; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; CGP-NEXT: v_mov_b32_e32 v0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v18, vcc
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v16, v4
; CGP-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
@@ -2063,95 +2139,99 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v1
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v7, v1
+; CGP-NEXT: v_addc_u32_e32 v11, vcc, v8, v0, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT: ; implicit-def: $vgpr7_vgpr8
; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v16, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v5, v9
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
-; CGP-NEXT: v_cndmask_b32_e32 v10, v15, v17, vcc
-; CGP-NEXT: v_xor_b32_e32 v1, v10, v9
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6]
-; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT: v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT: v_mul_lo_u32 v2, v8, v0
-; CGP-NEXT: v_mul_lo_u32 v6, v7, v5
-; CGP-NEXT: v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT: v_mul_hi_u32 v3, v7, v0
-; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT: v_mov_b32_e32 v7, v1
+; CGP-NEXT: v_xor_b32_e32 v13, v5, v9
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v11, v[7:8]
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_cndmask_b32_e32 v12, v15, v17, vcc
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v10, v[5:6]
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT: v_xor_b32_e32 v8, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
+; CGP-NEXT: v_mul_lo_u32 v6, v10, v5
+; CGP-NEXT: v_xor_b32_e32 v1, v12, v9
+; CGP-NEXT: v_xor_b32_e32 v12, v3, v7
+; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v3, v8, v5
+; CGP-NEXT: v_mul_lo_u32 v3, v11, v5
+; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT: v_mul_hi_u32 v6, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v6, v10, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v5, v8, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v11, v5
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v0
-; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc
-; CGP-NEXT: v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT: v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v9
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v0
+; CGP-NEXT: v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; CGP-NEXT: v_mul_lo_u32 v5, v12, v3
+; CGP-NEXT: v_mul_lo_u32 v6, v8, v2
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v13, v9
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CGP-NEXT: v_mul_hi_u32 v9, v8, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
+; CGP-NEXT: v_mul_lo_u32 v9, v12, v2
+; CGP-NEXT: v_mul_hi_u32 v3, v12, v3
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_hi_u32 v6, v8, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT: v_mul_hi_u32 v7, v13, v2
+; CGP-NEXT: v_mul_hi_u32 v9, v12, v2
; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v5
+; CGP-NEXT: ; implicit-def: $vgpr5_vgpr6
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT: v_mov_b32_e32 v5, v3
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, v[5:6]
+; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
@@ -2195,72 +2275,74 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2
; CHECK-NEXT: v_trunc_f32_e32 v7, v5
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v7
-; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v2
+; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7
-; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT: v_mov_b32_e32 v2, v6
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3]
-; CHECK-NEXT: v_mul_lo_u32 v2, v11, v5
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT: v_mul_hi_u32 v7, v8, v5
+; CHECK-NEXT: ; implicit-def: $vgpr7_vgpr8
+; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v7, v6
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[7:8]
+; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v2, v[6:7]
+; CHECK-NEXT: v_mul_lo_u32 v7, v11, v5
; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT: v_mul_lo_u32 v12, v8, v6
+; CHECK-NEXT: v_mul_lo_u32 v12, v2, v6
; CHECK-NEXT: v_mul_lo_u32 v13, v11, v6
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v12
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12
; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v8, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT: v_mul_hi_u32 v8, v2, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v13, v5
; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v12, v8
+; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7
-; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v2
-; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v5, vcc
-; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT: v_mov_b32_e32 v2, v6
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3]
-; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v9
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v4, v9, vcc
-; CHECK-NEXT: v_xor_b32_e32 v7, v2, v9
-; CHECK-NEXT: v_mul_lo_u32 v2, v11, v5
-; CHECK-NEXT: v_mul_lo_u32 v4, v8, v6
-; CHECK-NEXT: v_xor_b32_e32 v10, v3, v9
-; CHECK-NEXT: v_mul_hi_u32 v3, v8, v5
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v6, vcc
+; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v2, 0
+; CHECK-NEXT: ; implicit-def: $vgpr7_vgpr8
+; CHECK-NEXT: v_mov_b32_e32 v7, v6
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[7:8]
+; CHECK-NEXT: v_ashrrev_i32_e32 v8, 31, v4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v2, v[6:7]
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; CHECK-NEXT: v_xor_b32_e32 v7, v3, v8
+; CHECK-NEXT: v_mul_lo_u32 v3, v11, v5
+; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6
+; CHECK-NEXT: v_xor_b32_e32 v10, v4, v8
+; CHECK-NEXT: v_mul_hi_u32 v4, v2, v5
; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v3, v11, v6
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2
-; CHECK-NEXT: v_mul_hi_u32 v4, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT: v_mul_hi_u32 v5, v11, v6
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CHECK-NEXT: v_mul_hi_u32 v9, v2, v6
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v3, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v11, v4, vcc
; CHECK-NEXT: v_mul_lo_u32 v4, v10, v2
; CHECK-NEXT: v_mul_lo_u32 v5, v7, v3
; CHECK-NEXT: v_mul_hi_u32 v6, v7, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v10, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, v10, v3
+; CHECK-NEXT: v_mul_hi_u32 v9, v10, v3
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
@@ -2277,9 +2359,11 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v6, 0
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v4
+; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v9, v[4:5]
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v6, v[3:4]
; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v10, v3
@@ -2293,24 +2377,24 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v2, v0
; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v1
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v6, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5]
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
-; CHECK-NEXT: v_xor_b32_e32 v0, v0, v9
-; CHECK-NEXT: v_xor_b32_e32 v1, v1, v9
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v9
-; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CHECK-NEXT: v_xor_b32_e32 v0, v0, v8
+; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
+; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
; CHECK-NEXT: ; implicit-def: $vgpr3
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
@@ -2363,255 +2447,262 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v4
; GISEL-NEXT: v_trunc_f32_e32 v12, v10
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v12
-; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v12
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v11
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v16, v10
-; GISEL-NEXT: v_mul_hi_u32 v17, v13, v10
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
+; GISEL-NEXT: ; implicit-def: $vgpr12_vgpr13
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v4, 0
+; GISEL-NEXT: v_mov_b32_e32 v12, v11
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[12:13]
+; GISEL-NEXT: v_mul_hi_u32 v17, v4, v10
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v4, v[11:12]
+; GISEL-NEXT: v_mul_lo_u32 v12, v16, v10
; GISEL-NEXT: v_mul_hi_u32 v10, v16, v10
-; GISEL-NEXT: v_mul_lo_u32 v12, v13, v11
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v17, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4
-; GISEL-NEXT: v_mul_hi_u32 v12, v13, v11
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13
+; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v4
-; GISEL-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v11
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v4, v10
+; GISEL-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v17, 0
+; GISEL-NEXT: ; implicit-def: $vgpr12_vgpr13
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
+; GISEL-NEXT: v_mov_b32_e32 v12, v11
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[12:13]
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v12, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[11:12]
+; GISEL-NEXT: v_xor_b32_e32 v13, v0, v4
; GISEL-NEXT: v_mul_lo_u32 v0, v16, v10
-; GISEL-NEXT: v_mul_lo_u32 v14, v13, v11
-; GISEL-NEXT: v_xor_b32_e32 v15, v1, v4
-; GISEL-NEXT: v_mul_hi_u32 v1, v13, v10
+; GISEL-NEXT: v_mul_lo_u32 v12, v17, v11
+; GISEL-NEXT: v_xor_b32_e32 v14, v1, v4
+; GISEL-NEXT: v_mul_hi_u32 v1, v17, v10
; GISEL-NEXT: v_mul_hi_u32 v10, v16, v10
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT: v_mul_hi_u32 v14, v13, v11
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
+; GISEL-NEXT: v_mul_hi_u32 v12, v17, v11
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v0
; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v16, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v13, v15, v10
-; GISEL-NEXT: v_mul_lo_u32 v14, v12, v11
+; GISEL-NEXT: v_mul_lo_u32 v12, v14, v10
+; GISEL-NEXT: v_mul_lo_u32 v15, v13, v11
; GISEL-NEXT: v_lshl_b64 v[0:1], v[8:9], v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v12, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, v15, v10
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v14
+; GISEL-NEXT: v_mul_hi_u32 v6, v13, v10
+; GISEL-NEXT: v_mul_hi_u32 v10, v14, v10
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v15
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v8, v15, v11
+; GISEL-NEXT: v_mul_lo_u32 v8, v14, v11
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_mul_hi_u32 v9, v12, v11
+; GISEL-NEXT: v_mul_hi_u32 v9, v13, v11
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v8, v6
-; GISEL-NEXT: v_mul_hi_u32 v8, v15, v11
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v8, v6
+; GISEL-NEXT: v_mul_hi_u32 v8, v14, v11
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v8, v6
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v8, v6
; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v13, 0
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v5, v15, 0
; GISEL-NEXT: v_xor_b32_e32 v6, v0, v8
; GISEL-NEXT: v_xor_b32_e32 v8, v1, v8
-; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v6
-; GISEL-NEXT: v_cvt_f32_u32_e32 v16, v8
-; GISEL-NEXT: v_mov_b32_e32 v0, v10
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, v[0:1]
-; GISEL-NEXT: v_mac_f32_e32 v14, 0x4f800000, v16
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v14
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v13, v[0:1]
-; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 0, v6
+; GISEL-NEXT: v_cvt_f32_u32_e32 v17, v6
+; GISEL-NEXT: v_cvt_f32_u32_e32 v18, v8
+; GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v16, v[11:12]
+; GISEL-NEXT: v_mac_f32_e32 v17, 0x4f800000, v18
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v17
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v15, v[0:1]
+; GISEL-NEXT: v_sub_i32_e32 v15, vcc, 0, v6
; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v10
; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v1
-; GISEL-NEXT: v_trunc_f32_e32 v13, v10
-; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v13
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v1
-; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT: v_subb_u32_e32 v17, vcc, 0, v8, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v16, v14, 0
-; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v13, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v1, v13, v10
-; GISEL-NEXT: v_subb_u32_e64 v18, s[4:5], v15, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v17, v14, v[11:12]
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v15, v0
-; GISEL-NEXT: v_mul_lo_u32 v12, v14, v11
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v18, v7
+; GISEL-NEXT: v_trunc_f32_e32 v12, v10
+; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v12
+; GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v12
+; GISEL-NEXT: v_subb_u32_e32 v16, vcc, 0, v8, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v15, v1, 0
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v13, v9
+; GISEL-NEXT: ; implicit-def: $vgpr12_vgpr13
+; GISEL-NEXT: v_mov_b32_e32 v12, v11
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[12:13]
+; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], v14, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v16, v1, v[11:12]
+; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v14, v0
+; GISEL-NEXT: v_mul_lo_u32 v12, v17, v10
+; GISEL-NEXT: v_mul_lo_u32 v14, v1, v11
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v7
; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v0, v7, vcc
-; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v14, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v5
+; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT: v_mul_hi_u32 v14, v1, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT: v_mul_hi_u32 v10, v17, v10
+; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v1, v12, s[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v9, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v13, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[6:7]
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v9, v5
; GISEL-NEXT: v_subbrev_u32_e64 v19, s[6:7], 0, v0, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v19, v7
; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v0, v7, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[6:7]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v7
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v5
+; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v14, v5
; GISEL-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[6:7]
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v7, v19, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_hi_u32 v1, v13, v10
-; GISEL-NEXT: v_mul_lo_u32 v10, v13, v11
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT: v_mul_hi_u32 v15, v14, v11
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_mul_lo_u32 v14, v17, v11
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v18, v0
+; GISEL-NEXT: v_mul_hi_u32 v18, v1, v11
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18
+; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18
+; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v16, v11, 0
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v1, v0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v15, v11, 0
+; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v17, v10, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v16, v13, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v5, v4
-; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v17, v11, v[9:10]
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v18, v7, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v12, v2, v5
-; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v10, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v14, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GISEL-NEXT: v_ashrrev_i32_e32 v12, 31, v3
+; GISEL-NEXT: v_mov_b32_e32 v9, v1
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v14, v[9:10]
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v13, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v12
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v16, v11, v[9:10]
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v12, vcc
+; GISEL-NEXT: v_xor_b32_e32 v13, v1, v12
+; GISEL-NEXT: v_mul_lo_u32 v1, v14, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v11, v9
+; GISEL-NEXT: v_xor_b32_e32 v15, v2, v12
+; GISEL-NEXT: v_mul_hi_u32 v2, v11, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v2, v14, v9
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1
+; GISEL-NEXT: v_mul_hi_u32 v3, v11, v9
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2
-; GISEL-NEXT: v_mul_hi_u32 v10, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10
-; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_mul_hi_u32 v3, v14, v9
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v14, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v2, v15, v0
+; GISEL-NEXT: v_mul_lo_u32 v3, v13, v1
+; GISEL-NEXT: v_mul_hi_u32 v9, v13, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
+; GISEL-NEXT: v_xor_b32_e32 v5, v5, v4
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, v15, v1
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v14, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v12, v2
-; GISEL-NEXT: v_mul_hi_u32 v10, v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
-; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT: v_mul_hi_u32 v3, v13, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v10
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, v14, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v9, v3
-; GISEL-NEXT: v_mul_hi_u32 v9, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3
-; GISEL-NEXT: v_mul_hi_u32 v10, v14, v2
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v3
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v2
+; GISEL-NEXT: v_mul_hi_u32 v1, v15, v1
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v11, 0
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v0
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v1, v0
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4
; GISEL-NEXT: v_mov_b32_e32 v0, v3
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v9, v[0:1]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v4
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v7, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v11, v[9:10]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v14, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v14, v3
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v13, v2
+; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v15, v3, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v15, v3
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v8
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
-; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v6
-; GISEL-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v6
+; GISEL-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v8
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v8
+; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5]
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v12
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v12
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v12
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64_pow2_shl_denom:
@@ -2647,72 +2738,74 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v12, v10
; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v12
-; CGP-NEXT: v_cvt_u32_f32_e32 v13, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12
-; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT: v_mov_b32_e32 v4, v11
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
-; CGP-NEXT: v_mul_lo_u32 v4, v16, v10
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT: v_mul_hi_u32 v12, v13, v10
+; CGP-NEXT: ; implicit-def: $vgpr12_vgpr13
+; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v4, 0
+; CGP-NEXT: v_mov_b32_e32 v12, v11
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[12:13]
+; CGP-NEXT: v_mul_hi_u32 v13, v4, v10
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v4, v[11:12]
+; CGP-NEXT: v_mul_lo_u32 v12, v16, v10
; CGP-NEXT: v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT: v_mul_lo_u32 v17, v13, v11
+; CGP-NEXT: v_mul_lo_u32 v17, v4, v11
; CGP-NEXT: v_mul_lo_u32 v18, v16, v11
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v17
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12
-; CGP-NEXT: v_mul_hi_u32 v12, v13, v11
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v17, v4
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT: v_mul_hi_u32 v13, v4, v11
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
; CGP-NEXT: v_add_i32_e32 v10, vcc, v18, v10
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13
+; CGP-NEXT: v_mul_hi_u32 v11, v16, v11
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
-; CGP-NEXT: v_mul_hi_u32 v11, v16, v11
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v4
-; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc
-; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT: v_mov_b32_e32 v4, v11
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
-; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v14
-; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v14, vcc
-; CGP-NEXT: v_xor_b32_e32 v12, v4, v14
-; CGP-NEXT: v_mul_lo_u32 v4, v16, v10
-; CGP-NEXT: v_mul_lo_u32 v9, v13, v11
-; CGP-NEXT: v_xor_b32_e32 v15, v8, v14
-; CGP-NEXT: v_mul_hi_u32 v8, v13, v10
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
+; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc
+; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v4, 0
+; CGP-NEXT: ; implicit-def: $vgpr12_vgpr13
+; CGP-NEXT: v_mov_b32_e32 v12, v11
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[12:13]
+; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13
+; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v4, v[11:12]
+; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
+; CGP-NEXT: v_xor_b32_e32 v12, v8, v13
+; CGP-NEXT: v_mul_lo_u32 v8, v16, v10
+; CGP-NEXT: v_mul_lo_u32 v14, v4, v11
+; CGP-NEXT: v_xor_b32_e32 v15, v9, v13
+; CGP-NEXT: v_mul_hi_u32 v9, v4, v10
; CGP-NEXT: v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, v16, v11
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_mul_hi_u32 v9, v13, v11
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_hi_u32 v10, v16, v11
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v16, v11
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; CGP-NEXT: v_mul_hi_u32 v14, v4, v11
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; CGP-NEXT: v_mul_hi_u32 v11, v16, v11
; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT: v_addc_u32_e32 v8, vcc, v16, v8, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT: v_addc_u32_e32 v8, vcc, v16, v9, vcc
; CGP-NEXT: v_mul_lo_u32 v9, v15, v4
; CGP-NEXT: v_mul_lo_u32 v10, v12, v8
; CGP-NEXT: v_mul_hi_u32 v11, v12, v4
; CGP-NEXT: v_mul_hi_u32 v4, v15, v4
-; CGP-NEXT: v_mul_hi_u32 v13, v15, v8
+; CGP-NEXT: v_mul_hi_u32 v14, v15, v8
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
@@ -2725,15 +2818,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v9
-; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v11, 0
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v4
-; CGP-NEXT: v_mov_b32_e32 v4, v9
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v10, v[4:5]
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v4, 0
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v10
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_mov_b32_e32 v10, v9
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v14, v[10:11]
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v4, v[9:10]
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v8
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v11, v[9:10]
; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v15, v9, vcc
; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v15, v9
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1
@@ -2746,24 +2840,24 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_sub_i32_e32 v11, vcc, v4, v0
; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v9, vcc
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v0
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v1
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v1, vcc
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0
-; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_xor_b32_e32 v0, v0, v14
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v14
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v14, vcc
+; CGP-NEXT: v_xor_b32_e32 v0, v0, v13
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v13
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v13
+; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc
; CGP-NEXT: ; implicit-def: $vgpr11_vgpr12
; CGP-NEXT: ; implicit-def: $vgpr8
; CGP-NEXT: .LBB8_2: ; %Flow1
@@ -2814,6 +2908,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v2
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v3, vcc
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
@@ -2821,92 +2916,95 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v6
; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v11, v4
+; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT: v_mov_b32_e32 v4, v9
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5]
-; CGP-NEXT: v_mul_lo_u32 v4, v6, v8
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT: v_mul_hi_u32 v10, v11, v8
+; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v4, 0
+; CGP-NEXT: v_mov_b32_e32 v10, v9
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[10:11]
+; CGP-NEXT: v_mul_hi_u32 v11, v4, v8
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, v[9:10]
+; CGP-NEXT: v_mul_lo_u32 v10, v6, v8
; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT: v_mul_lo_u32 v14, v11, v9
+; CGP-NEXT: v_mul_lo_u32 v14, v4, v9
; CGP-NEXT: v_mul_lo_u32 v15, v6, v9
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v11, v9
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v4, v9
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10
; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10
-; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v4
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT: v_mov_b32_e32 v4, v9
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5]
-; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v12
-; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v12, vcc
-; CGP-NEXT: v_xor_b32_e32 v7, v4, v12
-; CGP-NEXT: v_mul_lo_u32 v4, v6, v8
-; CGP-NEXT: v_mul_lo_u32 v10, v11, v9
-; CGP-NEXT: v_xor_b32_e32 v13, v5, v12
-; CGP-NEXT: v_mul_hi_u32 v5, v11, v8
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v9, vcc
+; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v4, 0
+; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11
+; CGP-NEXT: v_mov_b32_e32 v10, v9
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[10:11]
+; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v7
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, v[9:10]
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CGP-NEXT: v_xor_b32_e32 v10, v5, v11
+; CGP-NEXT: v_mul_lo_u32 v5, v6, v8
+; CGP-NEXT: v_mul_lo_u32 v12, v4, v9
+; CGP-NEXT: v_xor_b32_e32 v13, v7, v11
+; CGP-NEXT: v_mul_hi_u32 v7, v4, v8
; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v5, v6, v9
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT: v_mul_hi_u32 v10, v11, v9
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, v6, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
+; CGP-NEXT: v_mul_hi_u32 v12, v4, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4
-; CGP-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, v6, v7, vcc
; CGP-NEXT: v_mul_lo_u32 v6, v13, v4
-; CGP-NEXT: v_mul_lo_u32 v8, v7, v5
-; CGP-NEXT: v_mul_hi_u32 v9, v7, v4
+; CGP-NEXT: v_mul_lo_u32 v7, v10, v5
+; CGP-NEXT: v_mul_hi_u32 v8, v10, v4
; CGP-NEXT: v_mul_hi_u32 v4, v13, v4
-; CGP-NEXT: v_mul_hi_u32 v10, v13, v5
+; CGP-NEXT: v_mul_hi_u32 v9, v13, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v13, v5
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT: v_mul_lo_u32 v8, v13, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT: v_mul_hi_u32 v7, v10, v5
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v4, v6
-; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v4, v6
+; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6]
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6]
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v6
+; CGP-NEXT: ; implicit-def: $vgpr6_vgpr7
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v10, v4
+; CGP-NEXT: v_mov_b32_e32 v6, v5
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v9, v[6:7]
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6]
; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v13, v5, vcc
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3
@@ -2921,11 +3019,11 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v3
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v3
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v5, v3, vcc
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
-; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
@@ -2933,10 +3031,10 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v12
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v12
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v11
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v11
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
; CGP-NEXT: ; implicit-def: $vgpr5
; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
@@ -3033,199 +3131,205 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v1
; GISEL-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
+; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GISEL-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v3
; GISEL-NEXT: v_trunc_f32_e32 v5, v5
; GISEL-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5
-; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v3
+; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT: v_mov_b32_e32 v3, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[3:4]
-; GISEL-NEXT: v_mul_lo_u32 v3, v5, v7
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v7
+; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v3, 0
+; GISEL-NEXT: v_mov_b32_e32 v9, v8
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[9:10]
+; GISEL-NEXT: v_mul_hi_u32 v10, v3, v7
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v3, v[8:9]
+; GISEL-NEXT: v_mul_lo_u32 v9, v5, v7
; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
-; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8
+; GISEL-NEXT: v_mul_lo_u32 v13, v3, v8
; GISEL-NEXT: v_mul_lo_u32 v14, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v13, v3
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT: v_mul_hi_u32 v10, v3, v8
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v14, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v3
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT: v_mov_b32_e32 v3, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[3:4]
-; GISEL-NEXT: v_mul_lo_u32 v3, v5, v7
-; GISEL-NEXT: v_and_b32_e32 v11, 0xffffff, v0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT: v_mul_hi_u32 v0, v10, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
-; GISEL-NEXT: v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v3, 0
+; GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GISEL-NEXT: v_mov_b32_e32 v9, v8
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[9:10]
+; GISEL-NEXT: v_mul_hi_u32 v11, v3, v7
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v3, v[8:9]
+; GISEL-NEXT: v_mul_lo_u32 v9, v5, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT: v_mul_lo_u32 v10, v3, v8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT: v_mul_hi_u32 v10, v3, v8
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v8, v3
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v3, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, 0, v0
-; GISEL-NEXT: v_mul_lo_u32 v8, v11, v5
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v7
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT: v_mul_lo_u32 v8, 0, v7
+; GISEL-NEXT: v_mul_lo_u32 v9, v0, v5
; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, 0, v0
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT: v_mul_lo_u32 v8, 0, v5
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_mul_hi_u32 v7, v11, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v0, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT: v_mul_lo_u32 v9, 0, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT: v_mul_hi_u32 v8, v0, v5
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v10, 0, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v9, 0
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v6
-; GISEL-NEXT: v_mul_hi_u32 v9, 0, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v8, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v6
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, v[0:1]
-; GISEL-NEXT: v_cvt_f32_u32_e32 v0, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7]
-; GISEL-NEXT: v_mac_f32_e32 v0, 0x4f800000, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v11, v5
-; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], 0, v6, vcc
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v7
+; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v10, v[7:8]
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v9, v[6:7]
+; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v4
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v8
+; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
+; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v6, vcc
+; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0
-; GISEL-NEXT: v_trunc_f32_e32 v9, v4
-; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v9
-; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v0
-; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], 0, v3
-; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
-; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT: v_trunc_f32_e32 v7, v4
+; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v7
+; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 0, v3
+; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v0, 0
+; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v7
; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], 0, v6
-; GISEL-NEXT: v_mov_b32_e32 v0, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[0:1]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v0, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v0, v9, v4
-; GISEL-NEXT: v_mul_lo_u32 v6, v10, v5
-; GISEL-NEXT: v_mul_hi_u32 v15, v10, v4
+; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GISEL-NEXT: v_mul_hi_u32 v15, v0, v4
+; GISEL-NEXT: v_mov_b32_e32 v6, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[6:7]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v0, v[5:6]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v7, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v0, v5
; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v13, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v15
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v15, v9, v5
-; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT: v_mul_hi_u32 v6, v10, v5
+; GISEL-NEXT: v_mul_lo_u32 v15, v12, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v0, v5
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v15, v7
+; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6
-; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
-; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v7, v1
-; GISEL-NEXT: v_mov_b32_e32 v0, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[0:1]
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v5, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v0, 0
+; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v8, v1
+; GISEL-NEXT: v_mov_b32_e32 v6, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[6:7]
; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v13, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v15, v1
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v0, v[5:6]
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v7, -1, v16, vcc
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v1
; GISEL-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v13, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4
-; GISEL-NEXT: v_mul_lo_u32 v12, v10, v5
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v10, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4
-; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v11, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v11, v9, v5
-; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v12, v10, v5
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v11, v4
+; GISEL-NEXT: v_mul_lo_u32 v10, v12, v4
+; GISEL-NEXT: v_mul_lo_u32 v11, v0, v5
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v15, v1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v1, v0, v4
+; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
-; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
-; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v4, v0
+; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v10, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v10, v12, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v12, v4
+; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v11, v1
+; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5
+; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v10, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT: v_mul_hi_u32 v5, v12, v5
+; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v4, v1
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v11, v4
+; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v10, v4
; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v5, v4
-; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v10, v0
-; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], v9, v4, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v5, 0, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v2, v4
+; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v1
+; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v12, v4, s[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v4, 0, v0
+; GISEL-NEXT: v_mul_lo_u32 v5, v2, v1
; GISEL-NEXT: v_cndmask_b32_e32 v10, v13, v6, vcc
; GISEL-NEXT: v_mul_hi_u32 v6, v2, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT: v_mul_lo_u32 v9, 0, v4
; GISEL-NEXT: v_mul_hi_u32 v0, 0, v0
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v2, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_mul_lo_u32 v5, 0, v1
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v2, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v5
-; GISEL-NEXT: v_mul_hi_u32 v11, 0, v4
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v9, 0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v4
+; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v11, 0
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v0
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v1, v0
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
; GISEL-NEXT: v_mov_b32_e32 v0, v5
; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v6, v[0:1]
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v7, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], 0, v11, v[5:6]
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], 0, v5, vcc
; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], 0, v5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
index 75148ecff5377..69504402892e7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
@@ -44,34 +44,35 @@ body: |
; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub0:vreg_64 = COPY [[COPY12]]
; CHECK-NEXT: [[COPY20:%[0-9]+]].sub1:vreg_64 = COPY [[COPY13]]
; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr14
- ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0_sub1:vreg_512 = COPY [[COPY14]]
- ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub2_sub3:vreg_512 = COPY [[COPY15]]
- ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub4_sub5:vreg_512 = COPY [[COPY16]]
- ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub6_sub7:vreg_512 = COPY [[COPY17]]
- ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub8_sub9:vreg_512 = COPY [[COPY18]]
- ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub10_sub11:vreg_512 = COPY [[COPY19]]
- ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub12_sub13:vreg_512 = COPY [[COPY20]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub0_sub1:vreg_512 = COPY [[COPY14]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub2_sub3:vreg_512 = COPY [[COPY15]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub4_sub5:vreg_512 = COPY [[COPY16]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub6_sub7:vreg_512 = COPY [[COPY17]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub8_sub9:vreg_512 = COPY [[COPY18]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub10_sub11:vreg_512 = COPY [[COPY19]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]].sub12_sub13:vreg_512 = COPY [[COPY20]]
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 1, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub0, 0, [[COPY22]].sub2, [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub1, 0, [[COPY22]].sub3, [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]].sub0, 0, [[DEF]].sub2, [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]].sub1, 0, [[DEF]].sub3, [[V_CMP_EQ_U32_e64_]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 2, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY22]].sub4, [[V_CMP_EQ_U32_e64_1]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_1]], 0, [[COPY22]].sub5, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[DEF]].sub4, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_1]], 0, [[DEF]].sub5, [[V_CMP_EQ_U32_e64_1]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 3, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_2]], 0, [[COPY22]].sub6, [[V_CMP_EQ_U32_e64_2]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_3]], 0, [[COPY22]].sub7, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_2]], 0, [[DEF]].sub6, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_3]], 0, [[DEF]].sub7, [[V_CMP_EQ_U32_e64_2]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 4, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_4]], 0, [[COPY22]].sub8, [[V_CMP_EQ_U32_e64_3]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_5]], 0, [[COPY22]].sub9, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_4]], 0, [[DEF]].sub8, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_5]], 0, [[DEF]].sub9, [[V_CMP_EQ_U32_e64_3]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 5, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_6]], 0, [[COPY22]].sub10, [[V_CMP_EQ_U32_e64_4]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_7]], 0, [[COPY22]].sub11, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_6]], 0, [[DEF]].sub10, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_7]], 0, [[DEF]].sub11, [[V_CMP_EQ_U32_e64_4]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 6, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_8]], 0, [[COPY22]].sub12, [[V_CMP_EQ_U32_e64_5]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_9]], 0, [[COPY22]].sub13, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_8]], 0, [[DEF]].sub12, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_9]], 0, [[DEF]].sub13, [[V_CMP_EQ_U32_e64_5]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_6:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 7, [[COPY21]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_10]], 0, undef [[COPY22]].sub14, [[V_CMP_EQ_U32_e64_6]], implicit $exec
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_11]], 0, undef [[COPY22]].sub15, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_10]], 0, undef [[DEF]].sub14, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_11]], 0, undef [[DEF]].sub15, [[V_CMP_EQ_U32_e64_6]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_12]]
; CHECK-NEXT: $vgpr1 = COPY [[V_CNDMASK_B32_e64_13]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index d9158e3558395..59f6df4b52162 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -645,10 +645,12 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v2, 8, v3 op_sel_hi:[0,1]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v1, 8, v6 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, v2, v1 clamp
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2
@@ -894,6 +896,8 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s2
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s0, s1 clamp
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
index ba5a8e9c68a1f..d4547b689b67b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
@@ -132,60 +132,64 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_trunc_f32_e32 v2, v1
; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX8-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX8-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX8-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v7, v3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v3, v4, v1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1
+; GFX8-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v6, v3
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0
; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1
; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0
@@ -207,10 +211,12 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2]
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX8-NEXT: v_mov_b32_e32 v6, s9
-; GFX8-NEXT: v_mov_b32_e32 v5, s11
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v5, v[2:3]
+; GFX8-NEXT: v_mov_b32_e32 v3, s11
; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s8, v0
; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc
@@ -220,31 +226,31 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v6
-; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[0:1]
; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s10, v2
; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v0, vcc
; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4
-; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
+; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v5, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8
; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7
-; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v8
-; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s10, v7
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s10, v7
; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1]
; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9
; GFX8-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1]
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v12, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc
; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v9, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v14, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v9, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, v4, s[0:1]
; GFX8-NEXT: v_mov_b32_e32 v4, s4
@@ -271,59 +277,63 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX9-NEXT: v_trunc_f32_e32 v2, v1
; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX9-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8
; GFX9-NEXT: v_add_u32_e32 v2, v6, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v7, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v1, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: v_mov_b32_e32 v7, s19
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v6, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v3, v4, v1
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT: v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v6, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc
; GFX9-NEXT: v_mul_lo_u32 v2, s17, v0
; GFX9-NEXT: v_mul_lo_u32 v3, s16, v1
; GFX9-NEXT: v_mul_hi_u32 v4, s16, v0
@@ -344,25 +354,27 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s18, v5, 0
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v3, v[1:2]
-; GFX9-NEXT: v_mov_b32_e32 v6, s17
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_add3_u32 v4, v3, v2, v6
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mov_b32_e32 v6, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[2:3]
+; GFX9-NEXT: v_mov_b32_e32 v3, s17
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s19, v5, v[1:2]
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s16, v0
-; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
-; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v6
+; GFX9-NEXT: v_subb_co_u32_e64 v3, s[0:1], v3, v1, vcc
+; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v3
; GFX9-NEXT: v_sub_u32_e32 v0, s17, v1
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s18, v2
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1]
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s19, v6
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s19, v3
; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v7, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[0:1]
; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s18, v2
; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc
; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5
-; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
+; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s19, v9
; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s18, v8
@@ -379,18 +391,19 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc
; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1
; GFX9-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v10, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v7, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v15, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v5, s[0:1]
-; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[12:13]
-; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[14:15]
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1]
+; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[12:13]
+; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[14:15]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: udivrem_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s19
; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s18
@@ -400,61 +413,64 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX10-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX10-NEXT: v_trunc_f32_e32 v2, v1
-; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
-; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2
+; GFX10-NEXT: v_trunc_f32_e32 v4, v1
+; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v4
+; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v4
; GFX10-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s0, v3, 0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s0, v4, v[1:2]
-; GFX10-NEXT: s_subb_u32 s1, 0, s19
+; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s0, v5, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, s1, v3, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, s0, v4, v[2:3]
+; GFX10-NEXT: s_subb_u32 s1, 0, s19
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, s1, v5, v[1:2]
; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0
-; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX10-NEXT: v_mul_lo_u32 v3, v5, v1
; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v8, v5, v1
; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT: v_add_co_u32 v2, s2, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s2
+; GFX10-NEXT: v_add_co_u32 v2, s2, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
; GFX10-NEXT: v_add_co_u32 v6, s2, v7, v6
; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s2
; GFX10-NEXT: v_add_co_u32 v0, s2, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX10-NEXT: v_add_co_u32 v2, s2, v6, v8
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s2
-; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v7, v6
; GFX10-NEXT: v_add_co_u32 v0, s2, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0
-; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v5, v0
+; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s0, v4, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v5, 0
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s1, v3, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s0, v4, v[2:3]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s1, v5, v[1:2]
; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0
-; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX10-NEXT: v_mul_lo_u32 v3, v5, v1
; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1
+; GFX10-NEXT: v_mul_hi_u32 v8, v5, v1
; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v5
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v6, s0, v7, v6
; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v2, s0, v6, v8
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v7, v6
; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0
-; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v5, v0
+; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1
; GFX10-NEXT: v_mul_lo_u32 v2, s17, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo
; GFX10-NEXT: v_mul_hi_u32 v4, s16, v0
@@ -471,16 +487,18 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2
; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v3
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: v_mul_hi_u32 v6, s17, v1
; GFX10-NEXT: v_add_co_u32 v5, s0, v0, v2
-; GFX10-NEXT: v_mul_hi_u32 v2, s17, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v3
+; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s18, v5, 0
-; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s18, v3, v[1:2]
+; GFX10-NEXT: v_add3_u32 v4, v4, v7, v6
+; GFX10-NEXT: v_mov_b32_e32 v2, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s18, v4, v[2:3]
; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s19, v5, v[1:2]
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1
-; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo
; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s16, v0
; GFX10-NEXT: v_sub_nc_u32_e32 v6, s17, v1
; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s0, s17, v1, vcc_lo
@@ -497,7 +515,7 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_cmp_le_u32_e64 s0, s19, v9
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0
; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1
-; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v3, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s19, v9
; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s19, v8
@@ -507,13 +525,13 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v0, s0, 0, v0, s0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo
; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v14, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v6, v10, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v14, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v10, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v0, vcc_lo
; GFX10-NEXT: v_mov_b32_e32 v9, 0
; GFX10-NEXT: v_cndmask_b32_e64 v0, v5, v2, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v1, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v4, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v4, v1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v3, s0
; GFX10-NEXT: v_cndmask_b32_e64 v3, v8, v6, s0
; GFX10-NEXT: global_store_dwordx2 v9, v[0:1], s[12:13]
; GFX10-NEXT: global_store_dwordx2 v9, v[2:3], s[14:15]
@@ -1010,66 +1028,71 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_trunc_f32_e32 v2, v1
; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX8-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX8-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX8-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v7, v3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX8-NEXT: v_mul_hi_u32 v6, v4, v0
; GFX8-NEXT: s_sub_u32 s2, 0, s14
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX8-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX8-NEXT: v_mul_lo_u32 v3, v4, v1
; GFX8-NEXT: s_subb_u32 s3, 0, s15
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1
+; GFX8-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
+; GFX8-NEXT: v_mul_hi_u32 v3, v4, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0
; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5
-; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v6, v3
+; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0
; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1
; GFX8-NEXT: v_mul_hi_u32 v4, s8, v0
; GFX8-NEXT: v_mul_hi_u32 v0, s9, v0
+; GFX8-NEXT: v_mov_b32_e32 v5, s13
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4
@@ -1088,11 +1111,12 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v4, v2
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v7, v[1:2]
-; GFX8-NEXT: v_mov_b32_e32 v3, s9
+; GFX8-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s8, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s12, v7, v[2:3]
+; GFX8-NEXT: v_mov_b32_e32 v3, s9
; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s13, v6, v[1:2]
-; GFX8-NEXT: v_mov_b32_e32 v4, s13
; GFX8-NEXT: v_subb_u32_e64 v0, s[0:1], v3, v1, vcc
; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s9, v1
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v0
@@ -1103,94 +1127,97 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_cndmask_b32_e64 v9, v2, v3, s[0:1]
; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s15
; GFX8-NEXT: v_cvt_f32_u32_e32 v3, s14
-; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v4, vcc
+; GFX8-NEXT: v_subb_u32_e32 v10, vcc, v1, v5, vcc
; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f800000, v2
; GFX8-NEXT: v_add_f32_e32 v1, v1, v3
; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s12, v8
-; GFX8-NEXT: v_subbrev_u32_e64 v11, s[0:1], 0, v5, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s12, v8
+; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v10, vcc
; GFX8-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GFX8-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1
; GFX8-NEXT: v_trunc_f32_e32 v3, v2
; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v3
; GFX8-NEXT: v_add_f32_e32 v1, v2, v1
-; GFX8-NEXT: v_cvt_u32_f32_e32 v12, v1
-; GFX8-NEXT: v_add_u32_e64 v13, s[0:1], 1, v6
-; GFX8-NEXT: v_addc_u32_e64 v14, s[0:1], 0, v7, s[0:1]
-; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v12, 0
-; GFX8-NEXT: v_cvt_u32_f32_e32 v15, v3
-; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v11
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1]
-; GFX8-NEXT: v_subb_u32_e32 v4, vcc, v5, v4, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v15, v[2:3]
-; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v10
+; GFX8-NEXT: v_cvt_u32_f32_e32 v13, v1
+; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v6
+; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v7, s[0:1]
+; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v13, 0
+; GFX8-NEXT: v_cvt_u32_f32_e32 v16, v3
+; GFX8-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s13, v12
+; GFX8-NEXT: v_mov_b32_e32 v3, v2
; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1]
-; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s3, v12, v[2:3]
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v11
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1]
-; GFX8-NEXT: v_mul_lo_u32 v3, v15, v1
-; GFX8-NEXT: v_mul_lo_u32 v17, v12, v2
-; GFX8-NEXT: v_mul_hi_u32 v5, v12, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v15, v1
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v17
-; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5
+; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v16, v[3:4]
+; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s12, v11
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1]
+; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s3, v13, v[2:3]
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v12
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v4, s[0:1]
+; GFX8-NEXT: v_mul_lo_u32 v3, v16, v1
+; GFX8-NEXT: v_mul_lo_u32 v4, v13, v2
+; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v10, v5, vcc
+; GFX8-NEXT: v_mul_hi_u32 v10, v13, v1
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v10
; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v5, v15, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v17, v3
-; GFX8-NEXT: v_mul_hi_u32 v17, v12, v2
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, v5, v1
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v17
-; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v17
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, 1, v13
-; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v14, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, s12, v10
-; GFX8-NEXT: v_mul_hi_u32 v2, v15, v2
-; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v4, vcc
+; GFX8-NEXT: v_mul_lo_u32 v10, v16, v2
+; GFX8-NEXT: v_mul_hi_u32 v1, v16, v1
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3
+; GFX8-NEXT: v_mul_hi_u32 v4, v13, v2
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v10, v1
+; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v10, v4
+; GFX8-NEXT: v_add_u32_e32 v10, vcc, 1, v14
+; GFX8-NEXT: v_addc_u32_e32 v18, vcc, 0, v15, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, s12, v11
+; GFX8-NEXT: v_mul_hi_u32 v2, v16, v2
+; GFX8-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v5, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v1
-; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v12, 0
-; GFX8-NEXT: v_addc_u32_e32 v15, vcc, v15, v2, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v13, v17, vcc
+; GFX8-NEXT: v_add_u32_e32 v13, vcc, v13, v1
+; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v13, 0
+; GFX8-NEXT: v_addc_u32_e32 v16, vcc, v16, v2, vcc
+; GFX8-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17
; GFX8-NEXT: v_mov_b32_e32 v1, v4
-; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v15, v[1:2]
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v14, v18, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v16, v[1:2]
+; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v10, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v14, v15, v18, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v13, v[4:5]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9
-; GFX8-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v12, v[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v2, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, v13, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v10, v19, vcc
-; GFX8-NEXT: v_mul_lo_u32 v7, v15, v3
-; GFX8-NEXT: v_mul_lo_u32 v9, v12, v4
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, v14, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e32 v5, v11, v19, vcc
+; GFX8-NEXT: v_mul_lo_u32 v7, v16, v3
+; GFX8-NEXT: v_mul_lo_u32 v9, v13, v4
; GFX8-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[0:1]
-; GFX8-NEXT: v_mul_hi_u32 v8, v12, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v6, v11, v20, vcc
+; GFX8-NEXT: v_mul_hi_u32 v8, v13, v3
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v10, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e32 v6, v12, v20, vcc
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9
; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_mul_lo_u32 v8, v15, v4
-; GFX8-NEXT: v_mul_hi_u32 v3, v15, v3
+; GFX8-NEXT: v_mul_lo_u32 v8, v16, v4
+; GFX8-NEXT: v_mul_hi_u32 v3, v16, v3
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT: v_mul_hi_u32 v9, v12, v4
+; GFX8-NEXT: v_mul_hi_u32 v9, v13, v4
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v8, v3
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v9
; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9
-; GFX8-NEXT: v_mul_hi_u32 v4, v15, v4
+; GFX8-NEXT: v_mul_hi_u32 v4, v16, v4
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7
; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v7
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v12, v3
-; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v15, v4, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v13, v3
+; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v16, v4, vcc
; GFX8-NEXT: v_mul_lo_u32 v7, s11, v3
; GFX8-NEXT: v_mul_lo_u32 v8, s10, v4
; GFX8-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1]
@@ -1208,54 +1235,55 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT: v_add_u32_e32 v9, vcc, v3, v0
-; GFX8-NEXT: v_mul_hi_u32 v8, s11, v4
-; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v9, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT: v_add_u32_e32 v10, vcc, v8, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, v4
-; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s14, v10, v[0:1]
-; GFX8-NEXT: v_mov_b32_e32 v4, s11
-; GFX8-NEXT: v_mov_b32_e32 v0, s15
-; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s15, v9, v[7:8]
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0
+; GFX8-NEXT: v_mul_hi_u32 v9, s11, v4
+; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s14, v0, 0
+; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
+; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v7
+; GFX8-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX8-NEXT: v_mov_b32_e32 v10, s11
+; GFX8-NEXT: v_mov_b32_e32 v7, v4
+; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s14, v9, v[7:8]
+; GFX8-NEXT: v_mov_b32_e32 v4, s15
+; GFX8-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s15, v0, v[7:8]
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, s10, v3
-; GFX8-NEXT: v_subb_u32_e64 v11, s[0:1], v4, v7, vcc
+; GFX8-NEXT: v_subb_u32_e64 v10, s[0:1], v10, v7, vcc
; GFX8-NEXT: v_sub_u32_e64 v3, s[0:1], s11, v7
-; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v11
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v8
+; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v10
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v11
-; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v0, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1]
-; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s14, v8
+; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v8
+; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1]
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v10
+; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[0:1]
+; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s14, v8
; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v3, vcc
; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s15, v12
; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v7
+; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s14, v11
; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1]
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s15, v12
; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1]
-; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v9
-; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v3, v0, vcc
-; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v10, s[0:1]
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v14
+; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v0
+; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v9, s[0:1]
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v14
; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
-; GFX8-NEXT: v_subrev_u32_e64 v13, s[0:1], s14, v7
-; GFX8-NEXT: v_subbrev_u32_e64 v0, s[0:1], 0, v0, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e32 v3, v14, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, v14, s[0:1]
+; GFX8-NEXT: v_subrev_u32_e64 v13, s[0:1], s14, v11
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v14, v4, vcc
+; GFX8-NEXT: v_subbrev_u32_e64 v14, s[0:1], 0, v3, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v16, vcc
+; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v0, v4, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e64 v4, v9, v15, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v11, v13, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v9, v12, v14, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v0, s[0:1]
+; GFX8-NEXT: v_cndmask_b32_e64 v8, v10, v9, s[0:1]
; GFX8-NEXT: v_mov_b32_e32 v10, s5
-; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
; GFX8-NEXT: v_mov_b32_e32 v9, s4
-; GFX8-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v8, v11, v0, s[0:1]
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[1:4]
; GFX8-NEXT: v_mov_b32_e32 v0, s6
; GFX8-NEXT: v_mov_b32_e32 v1, s7
@@ -1279,60 +1307,64 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_trunc_f32_e32 v2, v1
; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2
; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1
-; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1
-; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v0
+; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v2
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v0
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v7, v5, v1
+; GFX9-NEXT: v_mul_hi_u32 v8, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8
; GFX9-NEXT: v_add_u32_e32 v2, v6, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v7, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v1, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v4, 0
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s2, v5, v[2:3]
+; GFX9-NEXT: v_mul_hi_u32 v6, v4, v0
; GFX9-NEXT: s_sub_u32 s2, 0, s6
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
-; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0
-; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
+; GFX9-NEXT: v_mul_lo_u32 v2, v5, v0
+; GFX9-NEXT: v_mul_hi_u32 v0, v5, v0
+; GFX9-NEXT: v_mul_lo_u32 v3, v4, v1
; GFX9-NEXT: s_subb_u32 s3, 0, s7
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT: v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1
-; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1
+; GFX9-NEXT: v_mul_lo_u32 v6, v5, v1
+; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
+; GFX9-NEXT: v_mul_hi_u32 v3, v4, v1
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
+; GFX9-NEXT: v_add_u32_e32 v3, v6, v3
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
+; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v1, vcc
; GFX9-NEXT: v_mul_lo_u32 v2, s17, v0
; GFX9-NEXT: v_mul_lo_u32 v3, s16, v1
; GFX9-NEXT: v_mul_hi_u32 v4, s16, v0
@@ -1345,6 +1377,7 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_mul_lo_u32 v4, s17, v1
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
; GFX9-NEXT: v_mul_hi_u32 v3, s16, v1
+; GFX9-NEXT: v_mov_b32_e32 v6, s5
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
@@ -1354,9 +1387,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
; GFX9-NEXT: v_add3_u32 v8, v3, v2, v5
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s4, v8, v[1:2]
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: v_mov_b32_e32 v4, s17
-; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s4, v8, v[2:3]
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v7, v[1:2]
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s16, v0
; GFX9-NEXT: v_subb_co_u32_e64 v0, s[0:1], v4, v2, vcc
@@ -1369,81 +1403,84 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s7
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s6
; GFX9-NEXT: v_sub_u32_e32 v2, s17, v2
-; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v2, v5, vcc
+; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, v2, v6, vcc
; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v3
; GFX9-NEXT: v_add_f32_e32 v2, v2, v4
; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GFX9-NEXT: v_subrev_co_u32_e32 v10, vcc, s4, v1
-; GFX9-NEXT: v_subbrev_co_u32_e64 v11, s[0:1], 0, v6, vcc
+; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s4, v1
+; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v10, vcc
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GFX9-NEXT: v_trunc_f32_e32 v4, v3
; GFX9-NEXT: v_mul_f32_e32 v3, 0xcf800000, v4
; GFX9-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v2
-; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v7
-; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v8, s[0:1]
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v12, 0
-; GFX9-NEXT: v_cvt_u32_f32_e32 v15, v4
-; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s5, v11
-; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[0:1]
-; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v15, v[3:4]
-; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v10
+; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v2
+; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v7
+; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v8, s[0:1]
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v13, 0
+; GFX9-NEXT: v_cvt_u32_f32_e32 v16, v4
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s5, v12
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[0:1]
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v12, v[3:4]
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s5, v11
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[0:1]
-; GFX9-NEXT: v_mul_lo_u32 v4, v15, v2
-; GFX9-NEXT: v_mul_lo_u32 v17, v12, v3
-; GFX9-NEXT: v_mul_hi_u32 v6, v12, v2
-; GFX9-NEXT: v_mul_hi_u32 v2, v15, v2
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v17
-; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v16, v[4:5]
+; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v11
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1]
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s3, v13, v[3:4]
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s5, v12
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v17, v5, s[0:1]
+; GFX9-NEXT: v_mul_lo_u32 v4, v16, v2
+; GFX9-NEXT: v_mul_lo_u32 v5, v13, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v10, v6, vcc
+; GFX9-NEXT: v_mul_hi_u32 v10, v13, v2
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v10
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v6, v15, v3
-; GFX9-NEXT: v_add_u32_e32 v4, v17, v4
-; GFX9-NEXT: v_mul_hi_u32 v17, v12, v3
-; GFX9-NEXT: v_mul_hi_u32 v3, v15, v3
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v17
-; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GFX9-NEXT: v_add_u32_e32 v6, v6, v17
-; GFX9-NEXT: v_add_co_u32_e32 v17, vcc, 1, v13
-; GFX9-NEXT: v_addc_co_u32_e32 v18, vcc, 0, v14, vcc
-; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s4, v10
-; GFX9-NEXT: v_subbrev_co_u32_e32 v20, vcc, 0, v5, vcc
+; GFX9-NEXT: v_mul_lo_u32 v10, v16, v3
+; GFX9-NEXT: v_mul_hi_u32 v2, v16, v2
+; GFX9-NEXT: v_add_u32_e32 v4, v5, v4
+; GFX9-NEXT: v_mul_hi_u32 v5, v13, v3
+; GFX9-NEXT: v_mul_hi_u32 v3, v16, v3
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-NEXT: v_add_u32_e32 v5, v10, v5
+; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, 1, v14
+; GFX9-NEXT: v_addc_co_u32_e32 v18, vcc, 0, v15, vcc
+; GFX9-NEXT: v_subrev_co_u32_e32 v19, vcc, s4, v11
+; GFX9-NEXT: v_subbrev_co_u32_e32 v20, vcc, 0, v6, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v2
-; GFX9-NEXT: v_add3_u32 v3, v6, v4, v3
-; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v12, 0
-; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, v15, v3, vcc
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v13, v17, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v2
+; GFX9-NEXT: v_add3_u32 v3, v5, v4, v3
+; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s2, v13, 0
+; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, v16, v3, vcc
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17
; GFX9-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v15, v[2:3]
-; GFX9-NEXT: v_cndmask_b32_e32 v13, v14, v18, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[0:1], s2, v16, v[2:3]
+; GFX9-NEXT: v_cndmask_b32_e32 v10, v14, v10, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v18, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s3, v13, v[5:6]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9
-; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s3, v12, v[5:6]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, v3, s[0:1]
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v13, s[0:1]
-; GFX9-NEXT: v_mul_lo_u32 v7, v15, v4
-; GFX9-NEXT: v_mul_lo_u32 v8, v12, v5
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v19, vcc
-; GFX9-NEXT: v_mul_hi_u32 v10, v12, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v20, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, v10, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v14, s[0:1]
+; GFX9-NEXT: v_mul_lo_u32 v7, v16, v4
+; GFX9-NEXT: v_mul_lo_u32 v8, v13, v5
+; GFX9-NEXT: v_mul_hi_u32 v10, v13, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v11, v19, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v9, v12, v20, vcc
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v10, v15, v5
-; GFX9-NEXT: v_mul_hi_u32 v4, v15, v4
+; GFX9-NEXT: v_mul_lo_u32 v10, v16, v5
+; GFX9-NEXT: v_mul_hi_u32 v4, v16, v4
; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
-; GFX9-NEXT: v_mul_hi_u32 v8, v12, v5
-; GFX9-NEXT: v_mul_hi_u32 v5, v15, v5
+; GFX9-NEXT: v_mul_hi_u32 v8, v13, v5
+; GFX9-NEXT: v_mul_hi_u32 v5, v16, v5
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v10, v4
; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v8
@@ -1452,8 +1489,8 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_add_u32_e32 v8, v10, v8
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GFX9-NEXT: v_add3_u32 v5, v8, v7, v5
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v15, v5, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v16, v5, vcc
; GFX9-NEXT: v_mul_lo_u32 v7, s19, v4
; GFX9-NEXT: v_mul_lo_u32 v8, s18, v5
; GFX9-NEXT: v_cndmask_b32_e64 v6, v1, v6, s[0:1]
@@ -1477,9 +1514,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX9-NEXT: v_cndmask_b32_e64 v7, v0, v9, s[0:1]
; GFX9-NEXT: v_add_u32_e32 v0, v10, v8
; GFX9-NEXT: v_add3_u32 v8, v0, v1, v12
+; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX9-NEXT: v_mov_b32_e32 v9, s19
; GFX9-NEXT: v_mov_b32_e32 v0, v5
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v8, v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v9, s19
; GFX9-NEXT: v_mov_b32_e32 v5, s7
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s7, v11, v[0:1]
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s18, v4
@@ -1526,6 +1564,7 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x20
+; GFX10-NEXT: ; meta instruction
; GFX10-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s5
@@ -1546,120 +1585,127 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX10-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GFX10-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
; GFX10-NEXT: v_mul_f32_e32 v3, 0x2f800000, v1
-; GFX10-NEXT: v_trunc_f32_e32 v4, v2
-; GFX10-NEXT: v_trunc_f32_e32 v5, v3
-; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v4
-; GFX10-NEXT: v_mul_f32_e32 v3, 0xcf800000, v5
-; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v4
-; GFX10-NEXT: v_cvt_u32_f32_e32 v10, v5
+; GFX10-NEXT: v_trunc_f32_e32 v6, v2
+; GFX10-NEXT: v_trunc_f32_e32 v8, v3
+; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v6
+; GFX10-NEXT: v_mul_f32_e32 v3, 0xcf800000, v8
+; GFX10-NEXT: v_cvt_u32_f32_e32 v11, v6
+; GFX10-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v8
; GFX10-NEXT: v_add_f32_e32 v0, v2, v0
; GFX10-NEXT: v_add_f32_e32 v1, v3, v1
-; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v1
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s1, v7, 0
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s3, v8, 0
-; GFX10-NEXT: v_mul_hi_u32 v11, v9, v0
-; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s1, v9, v[1:2]
-; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s3, v10, v[3:4]
-; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s2, v7, v[4:5]
-; GFX10-NEXT: v_mul_hi_u32 v4, v7, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v8, v[5:6]
-; GFX10-NEXT: v_mul_lo_u32 v1, v10, v2
-; GFX10-NEXT: v_mul_hi_u32 v5, v8, v2
-; GFX10-NEXT: v_mul_hi_u32 v2, v10, v2
-; GFX10-NEXT: v_mul_lo_u32 v12, v7, v3
-; GFX10-NEXT: v_mul_lo_u32 v13, v9, v3
-; GFX10-NEXT: v_mul_hi_u32 v14, v7, v3
-; GFX10-NEXT: v_mul_lo_u32 v15, v8, v0
-; GFX10-NEXT: v_mul_lo_u32 v16, v10, v0
-; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0
-; GFX10-NEXT: v_mul_hi_u32 v3, v9, v3
-; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0
-; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v12
+; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v0
+; GFX10-NEXT: v_cvt_u32_f32_e32 v10, v1
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s1, v9, 0
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s3, v10, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, v1
+; GFX10-NEXT: v_mov_b32_e32 v6, v3
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s1, v11, v[4:5]
+; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s3, v8, v[6:7]
+; GFX10-NEXT: v_mul_lo_u32 v7, v11, v0
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s2, v9, v[3:4]
+; GFX10-NEXT: v_mul_hi_u32 v3, v9, v0
+; GFX10-NEXT: v_mul_hi_u32 v6, v11, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v10, v[4:5]
+; GFX10-NEXT: v_mul_lo_u32 v12, v9, v5
+; GFX10-NEXT: v_mul_lo_u32 v13, v11, v5
+; GFX10-NEXT: v_mul_lo_u32 v1, v8, v2
+; GFX10-NEXT: v_mul_hi_u32 v4, v10, v2
+; GFX10-NEXT: v_mul_hi_u32 v2, v8, v2
+; GFX10-NEXT: v_mul_hi_u32 v14, v9, v5
+; GFX10-NEXT: v_mul_lo_u32 v15, v10, v0
+; GFX10-NEXT: v_mul_lo_u32 v16, v8, v0
+; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v12
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v11, s0, v13, v11
+; GFX10-NEXT: v_add_co_u32 v6, s0, v13, v6
; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v15
; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v2, s0, v16, v2
; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v6, s0, v11, v14
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v5
+; GFX10-NEXT: v_add_co_u32 v3, s0, v7, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v14
+; GFX10-NEXT: v_mul_hi_u32 v17, v10, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v4
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v12, v3
+; GFX10-NEXT: v_mul_hi_u32 v5, v11, v5
+; GFX10-NEXT: v_mul_hi_u32 v0, v8, v0
; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v17
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
; GFX10-NEXT: v_add_nc_u32_e32 v1, v15, v1
-; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v3, s0, v6, v3
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11
; GFX10-NEXT: v_add_co_u32 v1, s0, v2, v1
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v16, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v7, v13, v7
+; GFX10-NEXT: v_add_nc_u32_e32 v4, v16, v4
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX10-NEXT: v_add3_u32 v3, v11, v6, v3
-; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v7, v4
-; GFX10-NEXT: v_add_co_u32 v8, s0, v8, v1
-; GFX10-NEXT: v_add3_u32 v2, v5, v2, v0
-; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s11, s1, v7, 0
-; GFX10-NEXT: v_add_co_ci_u32_e64 v10, vcc_lo, v10, v2, s0
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s3, v8, 0
-; GFX10-NEXT: v_mul_hi_u32 v11, v9, v0
-; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s1, v9, v[1:2]
-; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s3, v10, v[3:4]
-; GFX10-NEXT: v_mul_lo_u32 v6, v9, v0
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s2, v7, v[4:5]
-; GFX10-NEXT: v_mul_hi_u32 v4, v7, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v8, v[5:6]
-; GFX10-NEXT: v_mul_lo_u32 v1, v10, v2
-; GFX10-NEXT: v_mul_hi_u32 v5, v8, v2
-; GFX10-NEXT: v_mul_hi_u32 v2, v10, v2
-; GFX10-NEXT: v_mul_lo_u32 v12, v7, v3
-; GFX10-NEXT: v_mul_lo_u32 v13, v9, v3
-; GFX10-NEXT: v_mul_hi_u32 v14, v7, v3
-; GFX10-NEXT: v_mul_lo_u32 v15, v8, v0
-; GFX10-NEXT: v_mul_lo_u32 v16, v10, v0
-; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0
-; GFX10-NEXT: v_mul_hi_u32 v3, v9, v3
-; GFX10-NEXT: v_mul_hi_u32 v0, v10, v0
-; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v12
+; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v9, v3
+; GFX10-NEXT: v_add3_u32 v5, v7, v6, v5
+; GFX10-NEXT: v_add_co_u32 v10, s0, v10, v1
+; GFX10-NEXT: v_add3_u32 v2, v4, v2, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s11, s1, v9, 0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, v11, v5, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e64 v8, vcc_lo, v8, v2, s0
+; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s3, v10, 0
+; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX10-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX10-NEXT: v_mov_b32_e32 v4, v1
+; GFX10-NEXT: v_mov_b32_e32 v6, v3
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s1, v11, v[4:5]
+; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s3, v8, v[6:7]
+; GFX10-NEXT: v_mul_lo_u32 v7, v11, v0
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s2, v9, v[3:4]
+; GFX10-NEXT: v_mul_hi_u32 v3, v9, v0
+; GFX10-NEXT: v_mul_hi_u32 v6, v11, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v10, v[4:5]
+; GFX10-NEXT: v_mul_lo_u32 v12, v9, v5
+; GFX10-NEXT: v_mul_lo_u32 v13, v11, v5
+; GFX10-NEXT: v_mul_lo_u32 v1, v8, v2
+; GFX10-NEXT: v_mul_hi_u32 v4, v10, v2
+; GFX10-NEXT: v_mul_hi_u32 v2, v8, v2
+; GFX10-NEXT: v_mul_hi_u32 v14, v9, v5
+; GFX10-NEXT: v_mul_lo_u32 v15, v10, v0
+; GFX10-NEXT: v_mul_lo_u32 v16, v8, v0
+; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v12
; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v11, s0, v13, v11
+; GFX10-NEXT: v_add_co_u32 v6, s0, v13, v6
; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v15
; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v2, s0, v16, v2
; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v6, s0, v11, v14
-; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v5
+; GFX10-NEXT: v_add_co_u32 v3, s0, v7, v3
+; GFX10-NEXT: v_mul_hi_u32 v17, v10, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v14
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v4
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v3, v12, v3
; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v17
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX10-NEXT: v_mul_hi_u32 v5, v11, v5
; GFX10-NEXT: v_add_nc_u32_e32 v1, v15, v1
-; GFX10-NEXT: v_add_co_u32 v4, s0, v6, v4
-; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v3, s0, v6, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v7, v13, v7
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
+; GFX10-NEXT: v_mul_hi_u32 v0, v8, v0
; GFX10-NEXT: v_add_co_u32 v1, s0, v2, v1
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v16, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v4, v16, v4
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX10-NEXT: v_add3_u32 v3, v11, v6, v3
-; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v7, v4
-; GFX10-NEXT: v_add_co_u32 v1, s0, v8, v1
-; GFX10-NEXT: v_add3_u32 v0, v5, v2, v0
-; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v9, v3, vcc_lo
-; GFX10-NEXT: v_mul_lo_u32 v3, s17, v4
-; GFX10-NEXT: v_mul_hi_u32 v5, s16, v4
-; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v10, v0, s0
+; GFX10-NEXT: v_add3_u32 v5, v7, v6, v5
+; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v9, v3
+; GFX10-NEXT: v_add_co_u32 v1, s0, v10, v1
+; GFX10-NEXT: v_add3_u32 v0, v4, v2, v0
+; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v11, v5, vcc_lo
+; GFX10-NEXT: v_mul_lo_u32 v4, s17, v3
+; GFX10-NEXT: v_mul_hi_u32 v5, s16, v3
+; GFX10-NEXT: v_add_co_ci_u32_e64 v0, vcc_lo, v8, v0, s0
; GFX10-NEXT: v_mul_lo_u32 v8, s16, v2
-; GFX10-NEXT: v_mul_hi_u32 v4, s17, v4
+; GFX10-NEXT: v_mul_hi_u32 v3, s17, v3
; GFX10-NEXT: v_mul_lo_u32 v9, s17, v2
; GFX10-NEXT: v_mul_lo_u32 v6, s19, v1
; GFX10-NEXT: v_mul_hi_u32 v10, s16, v2
@@ -1670,9 +1716,9 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX10-NEXT: v_mul_lo_u32 v12, s19, v0
; GFX10-NEXT: v_mul_hi_u32 v13, s18, v0
; GFX10-NEXT: v_mul_hi_u32 v14, s19, v0
-; GFX10-NEXT: v_add_co_u32 v0, s0, v3, v8
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v4, s0, v9, v4
+; GFX10-NEXT: v_add_co_u32 v0, s0, v4, v8
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v3, s0, v9, v3
; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v2, s0, v6, v2
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
@@ -1680,101 +1726,105 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v5
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX10-NEXT: v_add_co_u32 v4, s0, v4, v10
+; GFX10-NEXT: v_add_co_u32 v3, s0, v3, v10
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v7
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v4, v0
; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v13
-; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
-; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2
-; GFX10-NEXT: v_add_nc_u32_e32 v5, v8, v5
-; GFX10-NEXT: v_add_co_u32 v8, s0, v4, v0
; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v7, v8, v5
+; GFX10-NEXT: v_add_co_u32 v8, s0, v3, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
; GFX10-NEXT: v_add_co_u32 v10, s0, v1, v2
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s4, v8, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s0
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s6, v10, 0
-; GFX10-NEXT: v_add_nc_u32_e32 v7, v9, v7
-; GFX10-NEXT: v_add3_u32 v9, v5, v4, v11
+; GFX10-NEXT: v_add_nc_u32_e32 v9, v9, v4
+; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX10-NEXT: v_add3_u32 v11, v7, v6, v11
+; GFX10-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX10-NEXT: v_mov_b32_e32 v13, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, v1
+; GFX10-NEXT: v_add3_u32 v9, v9, v12, v14
+; GFX10-NEXT: v_mov_b32_e32 v6, v3
; GFX10-NEXT: v_add_co_u32 v12, vcc_lo, v8, 1
-; GFX10-NEXT: v_mov_b32_e32 v11, 0
-; GFX10-NEXT: v_add3_u32 v7, v7, v6, v14
-; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s4, v9, v[1:2]
-; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v9, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s6, v7, v[3:4]
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s5, v8, v[4:5]
-; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v12, 1
-; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v13, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s16, v0
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s7, v10, v[5:6]
-; GFX10-NEXT: v_sub_co_ci_u32_e64 v5, s0, s17, v3, vcc_lo
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s4, v14
-; GFX10-NEXT: v_sub_nc_u32_e32 v1, s17, v3
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, -1, s0
-; GFX10-NEXT: v_sub_co_u32 v15, s0, s18, v2
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s0, s4, v11, v[4:5]
+; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, s6, v9, v[6:7]
+; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v11, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, s5, v8, v[3:4]
+; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v12, 1
+; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v14, vcc_lo
+; GFX10-NEXT: v_sub_co_u32 v7, vcc_lo, s16, v0
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s7, v10, v[4:5]
+; GFX10-NEXT: v_sub_co_ci_u32_e64 v15, s0, s17, v5, vcc_lo
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s4, v7
+; GFX10-NEXT: v_sub_nc_u32_e32 v1, s17, v5
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, -1, s0
+; GFX10-NEXT: v_sub_co_u32 v16, s0, s18, v2
; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
-; GFX10-NEXT: v_sub_co_ci_u32_e64 v16, s1, s19, v0, s0
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v15
+; GFX10-NEXT: v_sub_co_ci_u32_e64 v17, s1, s19, v0, s0
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v16
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s19, v0
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v17, vcc_lo, v14, s4
+; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, v7, s4
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s1, 0, v1, vcc_lo
-; GFX10-NEXT: v_cmp_le_u32_e64 s1, s5, v5
+; GFX10-NEXT: v_cmp_le_u32_e64 s1, s5, v15
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v23, s0, s7, v0, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s5, v18
; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s1
-; GFX10-NEXT: v_cmp_le_u32_e64 s1, s4, v17
+; GFX10-NEXT: v_cmp_le_u32_e64 s1, s4, v5
; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s1
; GFX10-NEXT: v_cmp_le_u32_e64 s1, s5, v18
; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, -1, s1
-; GFX10-NEXT: v_cmp_le_u32_e64 s1, s7, v16
+; GFX10-NEXT: v_cmp_le_u32_e64 s1, s7, v17
; GFX10-NEXT: v_cndmask_b32_e64 v0, v21, v20, s0
-; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s5, v5
+; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s5, v15
; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, -1, s1
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX10-NEXT: v_sub_co_u32 v0, s0, v17, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v19, v4, s0
+; GFX10-NEXT: v_sub_co_u32 v0, s0, v5, s4
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v19, s0, 0, v1, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc_lo
-; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v6, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v6, s1, v15, s6
-; GFX10-NEXT: v_cndmask_b32_e32 v4, v17, v0, vcc_lo
-; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s2, 0, v23, s1
+; GFX10-NEXT: v_cndmask_b32_e32 v1, v12, v3, vcc_lo
+; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v4
+; GFX10-NEXT: v_cndmask_b32_e32 v3, v14, v6, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc_lo
+; GFX10-NEXT: v_sub_co_u32 v5, s1, v16, s6
+; GFX10-NEXT: v_subrev_co_ci_u32_e64 v6, s2, 0, v23, s1
; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v1, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v3, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v11, v3, s0
; GFX10-NEXT: v_cndmask_b32_e32 v3, v18, v19, vcc_lo
-; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s7, v16
-; GFX10-NEXT: v_cndmask_b32_e64 v4, v14, v4, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v3, s0
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s7, v17
+; GFX10-NEXT: v_cndmask_b32_e64 v4, v7, v4, s0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v22, v2, vcc_lo
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v12
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v5
; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc_lo
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v6
-; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc_lo
-; GFX10-NEXT: v_add_co_u32 v13, vcc_lo, v10, 1
-; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v7, vcc_lo
-; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s7, v12
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo
-; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v13, 1
-; GFX10-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v14, vcc_lo
+; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v10, 1
+; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v9, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s7, v6
+; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo
+; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v11, 1
+; GFX10-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, vcc_lo, s7, v23, s1
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8
-; GFX10-NEXT: v_sub_co_u32 v8, s1, v6, s6
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7
+; GFX10-NEXT: v_sub_co_u32 v7, s1, v5, s6
; GFX10-NEXT: v_subrev_co_ci_u32_e64 v18, s1, 0, v18, s1
-; GFX10-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v13, v14, v17, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v8, v11, v8, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v11, v12, v14, vcc_lo
; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v2
-; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v8, v12, v18, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v9, s1
-; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v13, s1
-; GFX10-NEXT: v_cndmask_b32_e64 v6, v15, v6, s1
-; GFX10-NEXT: v_cndmask_b32_e64 v7, v16, v8, s1
-; GFX10-NEXT: global_store_dwordx4 v11, v[0:3], s[12:13]
-; GFX10-NEXT: global_store_dwordx4 v11, v[4:7], s[14:15]
+; GFX10-NEXT: v_cndmask_b32_e32 v7, v5, v7, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v12, v6, v18, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v5, v15, v3, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v8, s1
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, v11, s1
+; GFX10-NEXT: v_cndmask_b32_e64 v6, v16, v7, s1
+; GFX10-NEXT: v_cndmask_b32_e64 v7, v17, v12, s1
+; GFX10-NEXT: global_store_dwordx4 v13, v[0:3], s[12:13]
+; GFX10-NEXT: global_store_dwordx4 v13, v[4:7], s[14:15]
; GFX10-NEXT: s_endpgm
%div = udiv <2 x i64> %x, %y
store <2 x i64> %div, ptr addrspace(1) %out0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index 1fd139b06417f..4a19d7e99d621 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -633,10 +633,12 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v2, 8, v3 op_sel_hi:[0,1]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
; GFX11-TRUE16-NEXT: v_pk_lshlrev_b16 v1, 8, v6 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_sub_u16 v1, v2, v1 clamp
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2
@@ -878,6 +880,8 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s2
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_pk_sub_u16 v1, s0, s1 clamp
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11-TRUE16-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index 46b82d3a3d651..9c75967f2d624 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -226198,8 +226198,8 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add_f32_e32 v51, 0x40c00000, v51
; GFX11-TRUE16-NEXT: v_add3_u32 v37, v39, v48, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v49, 16, 1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v22.h
; GFX11-TRUE16-NEXT: v_dual_add_f32 v71, 0x40c00000, v71 :: v_dual_add_f32 v4, 0x40c00000, v4
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v37, v37, v38, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v38, v39, v49, 0x7fff
@@ -226207,22 +226207,21 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_bfe_u32 v48, v50, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
; GFX11-TRUE16-NEXT: v_dual_add_f32 v49, 0x40c00000, v52 :: v_dual_lshlrev_b32 v52, 16, v25
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v34, 16, v21
-; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v38, v39 :: v_dual_and_b32 v5, 0xffff0000, v5
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v80, 0x40c00000, v80 :: v_dual_add_f32 v5, 0x40c00000, v5
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v24, v38, v39, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v38, v48, v50, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v50
; GFX11-TRUE16-NEXT: v_bfe_u32 v48, v51, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
; GFX11-TRUE16-NEXT: v_bfe_u32 v50, v49, 16, 1
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v52, 0x40c00000, v52
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v80, 0x40c00000, v80 :: v_dual_add_f32 v5, 0x40c00000, v5
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v52, 0x40c00000, v52 :: v_dual_lshlrev_b32 v81, 16, v6
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v38, v38, v39, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v39, v48, v51, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v51
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v51, v51
; GFX11-TRUE16-NEXT: v_add_f32_e32 v51, 0x40c00000, v53
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 16, v6
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v81, 0x40c00000, v81 :: v_dual_add_f32 v6, 0x40c00000, v6
; GFX11-TRUE16-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v25, v39, v48, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v39, v50, v49, 0x7fff
@@ -226230,86 +226229,82 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_bfe_u32 v50, v52, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
; GFX11-TRUE16-NEXT: v_or_b32_e32 v49, 0x400000, v52
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v81, 0x40c00000, v81 :: v_dual_add_f32 v6, 0x40c00000, v6
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v84, 0x40c00000, v84 :: v_dual_lshlrev_b32 v85, 16, v10
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v39, v39, v48, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v48, v50, v52, 0x7fff
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v52, v52
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v53, 16, v26
; GFX11-TRUE16-NEXT: v_bfe_u32 v50, v51, 16, 1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v84, 0x40c00000, v84 :: v_dual_add_f32 v9, 0x40c00000, v9
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v48, v48, v49, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v53, 0x40c00000, v53 :: v_dual_add_f32 v54, 0x40c00000, v26
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v53, 0x40c00000, v53
; GFX11-TRUE16-NEXT: v_add3_u32 v49, v50, v51, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v50, 0x400000, v51
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v54, 0x40c00000, v26
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v27
; GFX11-TRUE16-NEXT: v_bfe_u32 v52, v53, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v51, v51
; GFX11-TRUE16-NEXT: v_or_b32_e32 v51, 0x400000, v53
; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 16, v10
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v49, v49, v50, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v50, v52, v53, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v52, v54, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v53, v53
; GFX11-TRUE16-NEXT: v_add_f32_e32 v55, 0x40c00000, v26
; GFX11-TRUE16-NEXT: v_add_f32_e32 v53, 0x40c00000, v27
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v85, 0x40c00000, v85 :: v_dual_cndmask_b32 v26, v50, v51
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v85, 0x40c00000, v85 :: v_dual_add_f32 v10, 0x40c00000, v10
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v26, v50, v51, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v50, v52, v54, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v51, 0x400000, v54
; GFX11-TRUE16-NEXT: v_bfe_u32 v52, v55, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v54, v54
; GFX11-TRUE16-NEXT: v_add_f32_e32 v54, 0x40c00000, v64
; GFX11-TRUE16-NEXT: v_and_b32_e32 v64, 0xffff0000, v28
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v10, 0x40c00000, v10 :: v_dual_add_f32 v11, 0x40c00000, v11
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v27, v50, v51, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v50, v52, v55, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v51, 0x400000, v55
; GFX11-TRUE16-NEXT: v_bfe_u32 v52, v53, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v55, v55
; GFX11-TRUE16-NEXT: v_add_f32_e32 v55, 0x40c00000, v64
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v27.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26
-; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v28, v50, v51 :: v_dual_and_b32 v13, 0xffff0000, v13
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v13, 0x40c00000, v13 :: v_dual_cndmask_b32 v28, v50, v51
; GFX11-TRUE16-NEXT: v_add3_u32 v50, v52, v53, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v51, 0x400000, v53
; GFX11-TRUE16-NEXT: v_bfe_u32 v52, v54, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v53, v53
; GFX11-TRUE16-NEXT: v_bfe_u32 v53, v55, 16, 1
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v33, 16, v26
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v37.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v23
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v50, v50, v51, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v51, v52, v54, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v52, 0x400000, v54
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v54, v54
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 16, v29
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v29, v51, v52, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v29, v51, v52 :: v_dual_lshlrev_b32 v64, 16, v29
; GFX11-TRUE16-NEXT: v_add3_u32 v51, v53, v55, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v52, 0x400000, v55
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v55, v55
; GFX11-TRUE16-NEXT: v_dual_add_f32 v54, 0x40c00000, v65 :: v_dual_lshlrev_b32 v65, 16, v30
; GFX11-TRUE16-NEXT: v_add_f32_e32 v64, 0x40c00000, v64
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v50.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v51, v51, v52, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v55, v54, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add_f32_e32 v65, 0x40c00000, v65
; GFX11-TRUE16-NEXT: v_bfe_u32 v53, v64, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v64, v64
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v28
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_add3_u32 v52, v53, v64, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v53, 0x400000, v64
; GFX11-TRUE16-NEXT: v_add_f32_e32 v64, 0x40c00000, v66
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 16, v31
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v30, v52, v53 :: v_dual_and_b32 v31, 0xffff0000, v31
; GFX11-TRUE16-NEXT: v_add3_u32 v52, v55, v54, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v53, 0x400000, v54
@@ -226349,21 +226344,18 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add_f32_e32 v67, 0x40c00000, v69
; GFX11-TRUE16-NEXT: v_bfe_u32 v65, v66, 16, 1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 16, v2
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v31.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v55, v64, vcc_lo
; GFX11-TRUE16-NEXT: v_or_b32_e32 v64, 0x400000, v66
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add3_u32 v55, v65, v66, 0x7fff
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v66, v66
; GFX11-TRUE16-NEXT: v_bfe_u32 v66, v67, 16, 1
; GFX11-TRUE16-NEXT: v_add_f32_e32 v69, 0x40c00000, v69
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v55, v55, v64, vcc_lo
; GFX11-TRUE16-NEXT: v_add_f32_e32 v68, 0x40c00000, v68
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v55.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_bfe_u32 v65, v68, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v68, v68
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v1
; GFX11-TRUE16-NEXT: v_add3_u32 v64, v65, v68, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v65, 0x400000, v68
; GFX11-TRUE16-NEXT: v_add_f32_e32 v68, 0x40c00000, v70
@@ -226395,12 +226387,10 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v70, v70
; GFX11-TRUE16-NEXT: v_bfe_u32 v70, v71, 16, 1
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v67, v67, v68, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v68, v69, v3, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v69, 0x400000, v3
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v67
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v68, v69, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v68, v70, v71, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v69, 0x400000, v71
@@ -226412,28 +226402,26 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add3_u32 v69, v70, v4, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v70, 0x400000, v4
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v68
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v4, v69, v70, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v69, v71, v80, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v70, 0x400000, v80
; GFX11-TRUE16-NEXT: v_bfe_u32 v71, v5, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v80, v80
; GFX11-TRUE16-NEXT: v_bfe_u32 v80, v81, 16, 1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v69, v69, v70, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v70, v71, v5, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v71, 0x400000, v5
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v69
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v5, v70, v71, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v70, v80, v81, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v71, 0x400000, v81
; GFX11-TRUE16-NEXT: v_bfe_u32 v80, v6, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v81, v81
; GFX11-TRUE16-NEXT: v_bfe_u32 v81, v82, 16, 1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v5.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v70, v70, v71, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v71, v80, v6, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v80, 0x400000, v6
@@ -226481,14 +226469,12 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_bfe_u32 v84, v10, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v85, v85
; GFX11-TRUE16-NEXT: v_bfe_u32 v85, v86, 16, 1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v9.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v81
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v82, v82, v83, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v83, v84, v10, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v84, 0x400000, v10
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v7, 16, v9
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v10, v83, v84, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v83, v85, v86, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v84, 0x400000, v86
@@ -226496,72 +226482,50 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v86, v86
; GFX11-TRUE16-NEXT: v_add_f32_e32 v86, 0x40c00000, v96
; GFX11-TRUE16-NEXT: v_or_b32_e32 v96, 0x400000, v11
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v10.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_add3_u32 v85, v85, v11, 0x7fff
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v83, v83, v84, vcc_lo
; GFX11-TRUE16-NEXT: v_bfe_u32 v84, v87, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-TRUE16-NEXT: v_bfe_u32 v99, v86, 16, 1
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v82
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add3_u32 v84, v84, v87, 0x7fff
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v11, v85, v96, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v87, v87
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_add3_u32 v87, v99, v86, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v96, 0x400000, v86
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v6, 16, v10
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v11.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v84, v84, v97, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v86, v86
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v83
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v86, v87, v96, vcc_lo
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v96, 16, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v5, 16, v11
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v70
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v96, 0x40c00000, v96 :: v_dual_add_f32 v15, 0x40c00000, v15
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v5, 16, v6
; GFX11-TRUE16-NEXT: v_bfe_u32 v101, v96, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v102, v15, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v113, 0x400000, v15
; GFX11-TRUE16-NEXT: v_or_b32_e32 v114, 0x400000, v96
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v17, 16, v69
; GFX11-TRUE16-NEXT: v_add3_u32 v101, v101, v96, 0x7fff
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add3_u32 v102, v102, v15, 0x7fff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v66.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v0
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v27, 16, v55
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v51.h
; GFX11-TRUE16-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v29
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v31, 16, v66
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_bfe_u32 v98, v12, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v97, 0x400000, v12
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v27, 16, v51
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v32, 16, v50
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v38.h
; GFX11-TRUE16-NEXT: v_add3_u32 v85, v98, v12, 0x7fff
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v98, 16, v14
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v24
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add_f32_e32 v87, 0x40c00000, v98
; GFX11-TRUE16-NEXT: v_bfe_u32 v98, v13, 16, 1
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v32, 16, v38
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v34.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v20
; GFX11-TRUE16-NEXT: v_bfe_u32 v99, v87, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add3_u32 v98, v98, v13, 0x7fff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v103, 0x400000, v87
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v18
; GFX11-TRUE16-NEXT: v_add3_u32 v99, v99, v87, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; GFX11-TRUE16-NEXT: v_bfe_u32 v100, v14, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v112, 0x400000, v14
@@ -226570,73 +226534,140 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add3_u32 v100, v100, v14, 0x7fff
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v14, v100, v112, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v14.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v15, v102, v113, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v96, v96
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v100.l, v15.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v96, v101, v114, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v87, v87
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v96
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v87, v99, v103, vcc_lo
; GFX11-TRUE16-NEXT: v_or_b32_e32 v99, 0x400000, v13
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v3, 16, v15
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v96.l, v14.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v87
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v100, 16, v15
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v13, v98, v99, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v4, 16, v14
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.h
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v96, 16, v14
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v12, v85, v97, vcc_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v85.l, v13.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v86
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v12.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.l, v12.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v84
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v3, 16, v13
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v84.l, v11.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v83
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v85, 16, v13
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v83.l, v10.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v82
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v86, 16, v12
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.l, v9.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v81
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v84, 16, v11
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v81.l, v8.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v80
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v4, 16, v12
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v7.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v83, 16, v10
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.l, v7.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v71
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v3, 16, v8
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v3.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v4, 16, v7
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v22, 16, v68
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v64.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v82, 16, v9
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.l, v6.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v70
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v81, 16, v8
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v70.l, v5.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v69
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v80, 16, v7
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.l, v4.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v68
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v71, 16, v6
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v68.l, v3.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v67
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v70, 16, v5
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v69, 16, v4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v67.l, v66.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.l, v55.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v1
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v68, 16, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v66.l, v64.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v2
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v17, 16, v65
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v54.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v3, 16, v67
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v22, 16, v64
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v52.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v70.l, v31.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v0
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v69, 16, v55
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v67, 16, v65
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.l, v54.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v66, 16, v64
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.l, v52.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v30
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v17, 16, v53
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v49.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v22, 16, v52
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v39.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.l, v50.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v28
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.l, v51.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v29
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v54, 16, v52
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v66.l, v27.h
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v65, 16, v50
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v55, 16, v53
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v64, 16, v51
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v49.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v37.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v23
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v39.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v25
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v17, 16, v48
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v36.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v19
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v32, 16, v34
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v22, 16, v39
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v33, 16, v37
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v33.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v32.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v17.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v17, 16, v35
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v33, 16, v36
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v37, 16, v38
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v39, 16, v16
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v38.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v24
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v50, 16, v48
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.l, v22.h
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v52, 16, v37
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v49, 16, v39
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v51, 16, v38
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v36.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v70, 16, v31
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v34.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v20
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v33.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v19
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v32.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v18
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v17.h
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v66, 16, v26
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v53, 16, v21
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v37, 16, v35
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v36, 16, v34
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v38, 16, v33
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v39, 16, v32
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v48, 16, v16
; GFX11-TRUE16-NEXT: .LBB104_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
index 9b28fd9e7b6fd..9533a8eb8a9bb 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
@@ -18251,7 +18251,6 @@ define <8 x i16> @bitcast_v8bf16_to_v8i16(<8 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add3_u32 v12, v12, v7, 0x7fff
; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; GFX11-TRUE16-NEXT: v_add3_u32 v8, v11, v1, 0x7fff
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v9, v10, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_add_f32_e32 v7, 0x40c00000, v4
@@ -18266,8 +18265,7 @@ define <8 x i16> @bitcast_v8bf16_to_v8i16(<8 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_add3_u32 v9, v9, v7, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v13, v4, 16, 1
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v7, v9, v11, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v9, v12, v2, 0x7fff
; GFX11-TRUE16-NEXT: v_add3_u32 v11, v13, v4, 0x7fff
@@ -18281,24 +18279,29 @@ define <8 x i16> @bitcast_v8bf16_to_v8i16(<8 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-TRUE16-NEXT: v_add3_u32 v13, v14, v3, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, 0x400000, v3
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v13, v14, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v9, v15, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v4
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v8, v10, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v3.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v7
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v0, 16, v3
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v1, 16, v2
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v7, 16, v5
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v4, 16, v6
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v1.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v6
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v8, 16, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v5
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v4, 16, v2
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v7, 16, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v6, 16, v0
; GFX11-TRUE16-NEXT: .LBB94_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
index 52e125d0d658f..b0664de7e6370 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
@@ -28849,21 +28849,20 @@ define <16 x i16> @bitcast_v16bf16_to_v16i16(<16 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v11, v1, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v12, 0x40c00000, v12 :: v_dual_add_f32 v3, 0x40c00000, v3
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add3_u32 v11, v11, v1, 0x7fff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v13, v0, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, 0x400000, v0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add3_u32 v13, v13, v0, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v13, v15, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, 0x400000, v1
@@ -28873,8 +28872,7 @@ define <16 x i16> @bitcast_v16bf16_to_v16i16(<16 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v14, v10, 16, 1
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v11, v13, vcc_lo
; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, 0x400000, v10
@@ -28885,13 +28883,12 @@ define <16 x i16> @bitcast_v16bf16_to_v16i16(<16 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, 0x400000, v4
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v10, v11, v13, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add3_u32 v11, v14, v2, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, 0x400000, v2
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_add3_u32 v14, v15, v12, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, 0x400000, v12
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v10
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v11, v13, vcc_lo
; GFX11-TRUE16-NEXT: v_bfe_u32 v11, v3, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
@@ -28949,33 +28946,45 @@ define <16 x i16> @bitcast_v16bf16_to_v16i16(<16 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v7, v23, v24, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v7.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v19, v25, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v18
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v5, v16, v20, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v7.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v18
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v4.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v6.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v14
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v0, 16, v7
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v5.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v11
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v11, v15, v17, vcc_lo
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v1, 16, v6
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v12
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v2, 16, v5
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v16, 16, v7
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v18, 16, v6
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v14, 16, v5
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v11
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v1.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v0.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v1, 16, v3
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v4
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v2, 16, v10
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v11, 16, v9
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v12, 16, v8
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v3.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v12
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v2.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v10
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v13, 16, v4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v1.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v9
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v11, 16, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v8
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v12, 16, v2
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v10, 16, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v9, 16, v0
; GFX11-TRUE16-NEXT: .LBB94_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll
index 632b03ca51b81..683e3dc15f133 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll
@@ -5455,12 +5455,13 @@ define <2 x i16> @bitcast_v2bf16_to_v2i16(<2 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-TRUE16-NEXT: .LBB50_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll
index fd190b23dd8ca..4c137069aae8c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll
@@ -824,17 +824,18 @@ define <3 x i16> @bitcast_v3bf16_to_v3i16(<3 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_add3_u32 v5, v5, v1, 0x7fff
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v7, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v8, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, 0x7fc0, 16, v2
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v3, 16, v0
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, 0x7fc0, 16, v1
; GFX11-TRUE16-NEXT: .LBB4_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
index ede44e738fe00..ac2b77a24b09d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
@@ -60959,49 +60959,42 @@ define <32 x i16> @bitcast_v32bf16_to_v32i16(<32 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22
; GFX11-TRUE16-NEXT: v_add3_u32 v19, v20, v22, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, 0x400000, v22
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v16.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc_lo
; GFX11-TRUE16-NEXT: v_add3_u32 v20, v21, v3, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, 0x400000, v3
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v20, v21, vcc_lo
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v3, v20, v21 :: v_dual_and_b32 v4, 0xffff0000, v4
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v23, 0x40c00000, v23 :: v_dual_add_f32 v4, 0x40c00000, v4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v22, v23, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, 0x400000, v23
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v20, v22, v23, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v22, v4, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v21, v22, v4, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, 0x400000, v4
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v4, v21, v22 :: v_dual_and_b32 v5, 0xffff0000, v5
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v24, 0x40c00000, v24 :: v_dual_add_f32 v5, 0x40c00000, v5
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v23, v24, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, 0x400000, v24
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v21, v23, v24, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v23, v5, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v22, v23, v5, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, 0x400000, v5
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v5, v22, v23, vcc_lo
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v5, v22, v23 :: v_dual_and_b32 v6, 0xffff0000, v6
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v25, 0x40c00000, v25 :: v_dual_add_f32 v6, 0x40c00000, v6
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v5.h
; GFX11-TRUE16-NEXT: v_bfe_u32 v24, v25, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, 0x400000, v25
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
@@ -61070,45 +61063,38 @@ define <32 x i16> @bitcast_v32bf16_to_v32i16(<32 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, 0x400000, v10
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v10, v27, v28 :: v_dual_and_b32 v11, 0xffff0000, v11
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v30, 0x40c00000, v30 :: v_dual_add_f32 v11, 0x40c00000, v11
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v10.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v29, v30, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, 0x400000, v30
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v26
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v27, v29, v30, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v29, v11, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v6, 16, v10
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v27, v27, v28, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add3_u32 v28, v29, v11, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, 0x400000, v11
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v11, v28, v29 :: v_dual_and_b32 v12, 0xffff0000, v12
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_add_f32 v31, 0x40c00000, v31 :: v_dual_add_f32 v12, 0x40c00000, v12
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v11.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfe_u32 v30, v31, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v28, v12, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31
; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v12
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v27
; GFX11-TRUE16-NEXT: v_add3_u32 v29, v30, v31, 0x7fff
; GFX11-TRUE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v32
; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, 0x400000, v31
; GFX11-TRUE16-NEXT: v_add3_u32 v28, v28, v12, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v31, v13, 16, 1
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v5, 16, v11
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v30, 16, 1
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v29, v29, v32, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add3_u32 v31, v31, v13, 0x7fff
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v22
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v12, v28, v33, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v32, 16, v14
@@ -61119,74 +61105,100 @@ define <32 x i16> @bitcast_v32bf16_to_v32i16(<32 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v13, v31, v35 :: v_dual_add_f32 v32, 0x40c00000, v32
; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v30
; GFX11-TRUE16-NEXT: v_dual_add_f32 v34, 0x40c00000, v34 :: v_dual_add_f32 v15, 0x40c00000, v15
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v32, 16, 1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v12.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v31, v15, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v15
; GFX11-TRUE16-NEXT: v_add3_u32 v35, v36, v32, 0x7fff
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v29
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add3_u32 v31, v31, v15, 0x7fff
; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v5, 16, v6
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v16, 16, v21
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v4, 16, v12
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v7.h
; GFX11-TRUE16-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v23
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v18.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v1.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v14, 16, 1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v14
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v4, 16, v7
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v21, 16, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add3_u32 v36, v37, v14, 0x7fff
; GFX11-TRUE16-NEXT: v_add3_u32 v37, v38, v34, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v34
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v34, v37, v38, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v34.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v15, v31, v39, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, 0x400000, v32
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v15
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v14, v36, v48, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v1, 16, v15
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v14.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v34.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v31, v35, v31, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v36, 16, v15
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v14.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v31
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v28, v28, v33, vcc_lo
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v1, 16, v14
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v13.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v30, 16, v14
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v28
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v12.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v29
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v11.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v27
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v31, 16, v13
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v10.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v26
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v28, 16, v12
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v9.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v25
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v3, 16, v13
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v29, 16, v11
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v8.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v24
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v1, 16, v9
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr24
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v27, 16, v10
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v7.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v23
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v26, 16, v9
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v6.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v22
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr22
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v25, 16, v8
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v5.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v21
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v24, 16, v7
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v4.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v20
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v2
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v3, 16, v8
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v3.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v16, 16, v17
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v1, 16, v4
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v18, 16, v20
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v3, 16, v19
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v23, 16, v6
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v3.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v19
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v22, 16, v5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v18.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr22
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v21, 16, v4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v16.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v1.h
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v3, v20, 16, v3
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v19, 16, v17
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v18, 16, v16
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v22, 16, v0
; GFX11-TRUE16-NEXT: .LBB94_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll
index ab1f8606cffd7..f9e0157e2dc4e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll
@@ -12846,22 +12846,25 @@ define <4 x i16> @bitcast_v4bf16_to_v4i16(<4 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v0, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v10, v11, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v0, 16, v1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v3
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v4, 16, v1
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v3
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX11-TRUE16-NEXT: .LBB94_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll
index 9f5c9c4c509ed..d6e89167e68e6 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll
@@ -8437,7 +8437,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_cvt_f16_f32_e32 v37, v5
; SI-NEXT: v_cvt_f16_f32_e32 v49, v4
; SI-NEXT: v_cvt_f16_f32_e32 v35, v7
-; SI-NEXT: v_cvt_f16_f32_e32 v50, v6
+; SI-NEXT: v_cvt_f16_f32_e32 v51, v6
; SI-NEXT: v_cvt_f16_f32_e32 v32, v9
; SI-NEXT: v_cvt_f16_f32_e32 v59, v8
; SI-NEXT: v_cvt_f16_f32_e32 v58, v11
@@ -8460,8 +8460,8 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_cvt_f16_f32_e32 v24, v26
; SI-NEXT: v_cvt_f16_f32_e32 v22, v29
; SI-NEXT: v_cvt_f16_f32_e32 v23, v28
-; SI-NEXT: v_cvt_f16_f32_e32 v51, s17
-; SI-NEXT: v_cvt_f16_f32_e32 v33, s16
+; SI-NEXT: v_cvt_f16_f32_e32 v33, s17
+; SI-NEXT: v_cvt_f16_f32_e32 v50, s16
; SI-NEXT: v_cvt_f16_f32_e32 v1, s19
; SI-NEXT: v_cvt_f16_f32_e32 v34, s18
; SI-NEXT: v_cvt_f16_f32_e32 v2, s21
@@ -8484,7 +8484,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: ; %bb.1: ; %cmp.false
; SI-NEXT: s_waitcnt expcnt(1)
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v51
+; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v33
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v2, v3, v2
@@ -8507,7 +8507,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_lshlrev_b32_e32 v19, 16, v52
; SI-NEXT: v_lshlrev_b32_e32 v20, 16, v25
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v22
-; SI-NEXT: v_or_b32_e32 v0, v33, v0
+; SI-NEXT: v_or_b32_e32 v0, v50, v0
; SI-NEXT: v_or_b32_e32 v1, v34, v1
; SI-NEXT: v_or_b32_e32 v3, v62, v3
; SI-NEXT: v_or_b32_e32 v4, v36, v4
@@ -8516,7 +8516,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_or_b32_e32 v7, v48, v7
; SI-NEXT: v_or_b32_e32 v8, v38, v8
; SI-NEXT: v_or_b32_e32 v9, v49, v9
-; SI-NEXT: v_or_b32_e32 v10, v50, v10
+; SI-NEXT: v_or_b32_e32 v10, v51, v10
; SI-NEXT: v_or_b32_e32 v11, v59, v11
; SI-NEXT: v_or_b32_e32 v12, v57, v12
; SI-NEXT: v_or_b32_e32 v13, v47, v13
@@ -8531,8 +8531,8 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: s_cbranch_execnz .LBB19_3
; SI-NEXT: .LBB19_2: ; %cmp.true
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; SI-NEXT: v_cvt_f32_f16_e32 v0, v51
-; SI-NEXT: v_cvt_f32_f16_e32 v1, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v50
; SI-NEXT: v_cvt_f32_f16_e32 v3, v34
; SI-NEXT: v_cvt_f32_f16_e32 v4, v62
; SI-NEXT: v_add_f32_e32 v0, 0x38000000, v0
@@ -8561,7 +8561,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_cvt_f32_f16_e32 v9, v38
; SI-NEXT: v_cvt_f32_f16_e32 v10, v37
; SI-NEXT: v_cvt_f32_f16_e32 v11, v49
-; SI-NEXT: v_cvt_f32_f16_e32 v12, v50
+; SI-NEXT: v_cvt_f32_f16_e32 v12, v51
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
; SI-NEXT: v_add_f32_e32 v10, 0x38000000, v10
; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
@@ -8739,7 +8739,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; SI-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; SI-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; SI-NEXT: s_waitcnt expcnt(0)
@@ -8757,14 +8757,14 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_mov_b32_e32 v56, v43
; SI-NEXT: v_mov_b32_e32 v43, v54
; SI-NEXT: v_mov_b32_e32 v54, v24
+; SI-NEXT: v_mov_b32_e32 v32, v50
; SI-NEXT: v_mov_b32_e32 v50, v34
; SI-NEXT: v_mov_b32_e32 v34, v62
; SI-NEXT: v_mov_b32_e32 v62, v57
; SI-NEXT: v_mov_b32_e32 v57, v44
; SI-NEXT: v_mov_b32_e32 v44, v55
; SI-NEXT: v_mov_b32_e32 v55, v25
-; SI-NEXT: v_mov_b32_e32 v32, v33
-; SI-NEXT: v_mov_b32_e32 v33, v63
+; SI-NEXT: v_mov_b32_e32 v51, v63
; SI-NEXT: v_mov_b32_e32 v63, v58
; SI-NEXT: v_mov_b32_e32 v58, v45
; SI-NEXT: v_mov_b32_e32 v45, v40
@@ -8784,14 +8784,14 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_mov_b32_e32 v40, v45
; SI-NEXT: v_mov_b32_e32 v45, v58
; SI-NEXT: v_mov_b32_e32 v58, v63
-; SI-NEXT: v_mov_b32_e32 v63, v33
-; SI-NEXT: v_mov_b32_e32 v33, v32
+; SI-NEXT: v_mov_b32_e32 v63, v51
; SI-NEXT: v_mov_b32_e32 v25, v55
; SI-NEXT: v_mov_b32_e32 v55, v44
; SI-NEXT: v_mov_b32_e32 v44, v57
; SI-NEXT: v_mov_b32_e32 v57, v62
; SI-NEXT: v_mov_b32_e32 v62, v34
; SI-NEXT: v_mov_b32_e32 v34, v50
+; SI-NEXT: v_mov_b32_e32 v50, v32
; SI-NEXT: v_mov_b32_e32 v24, v54
; SI-NEXT: v_mov_b32_e32 v54, v43
; SI-NEXT: v_mov_b32_e32 v43, v56
@@ -8808,7 +8808,7 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_mov_b32_e32 v46, v59
; SI-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; SI-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; SI-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
@@ -16907,7 +16907,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_cvt_f16_f32_e32 v37, v5
; SI-NEXT: v_cvt_f16_f32_e32 v49, v4
; SI-NEXT: v_cvt_f16_f32_e32 v35, v7
-; SI-NEXT: v_cvt_f16_f32_e32 v50, v6
+; SI-NEXT: v_cvt_f16_f32_e32 v51, v6
; SI-NEXT: v_cvt_f16_f32_e32 v32, v9
; SI-NEXT: v_cvt_f16_f32_e32 v59, v8
; SI-NEXT: v_cvt_f16_f32_e32 v58, v11
@@ -16930,8 +16930,8 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_cvt_f16_f32_e32 v24, v26
; SI-NEXT: v_cvt_f16_f32_e32 v22, v29
; SI-NEXT: v_cvt_f16_f32_e32 v23, v28
-; SI-NEXT: v_cvt_f16_f32_e32 v51, s17
-; SI-NEXT: v_cvt_f16_f32_e32 v33, s16
+; SI-NEXT: v_cvt_f16_f32_e32 v33, s17
+; SI-NEXT: v_cvt_f16_f32_e32 v50, s16
; SI-NEXT: v_cvt_f16_f32_e32 v1, s19
; SI-NEXT: v_cvt_f16_f32_e32 v34, s18
; SI-NEXT: v_cvt_f16_f32_e32 v2, s21
@@ -16954,7 +16954,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: ; %bb.1: ; %cmp.false
; SI-NEXT: s_waitcnt expcnt(1)
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v51
+; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v33
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v2, v3, v2
@@ -16977,7 +16977,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_lshlrev_b32_e32 v19, 16, v52
; SI-NEXT: v_lshlrev_b32_e32 v20, 16, v25
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v22
-; SI-NEXT: v_or_b32_e32 v0, v33, v0
+; SI-NEXT: v_or_b32_e32 v0, v50, v0
; SI-NEXT: v_or_b32_e32 v1, v34, v1
; SI-NEXT: v_or_b32_e32 v3, v62, v3
; SI-NEXT: v_or_b32_e32 v4, v36, v4
@@ -16986,7 +16986,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_or_b32_e32 v7, v48, v7
; SI-NEXT: v_or_b32_e32 v8, v38, v8
; SI-NEXT: v_or_b32_e32 v9, v49, v9
-; SI-NEXT: v_or_b32_e32 v10, v50, v10
+; SI-NEXT: v_or_b32_e32 v10, v51, v10
; SI-NEXT: v_or_b32_e32 v11, v59, v11
; SI-NEXT: v_or_b32_e32 v12, v57, v12
; SI-NEXT: v_or_b32_e32 v13, v47, v13
@@ -17001,8 +17001,8 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: s_cbranch_execnz .LBB35_3
; SI-NEXT: .LBB35_2: ; %cmp.true
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; SI-NEXT: v_cvt_f32_f16_e32 v0, v51
-; SI-NEXT: v_cvt_f32_f16_e32 v1, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v50
; SI-NEXT: v_cvt_f32_f16_e32 v3, v34
; SI-NEXT: v_cvt_f32_f16_e32 v4, v62
; SI-NEXT: v_add_f32_e32 v0, 0x38000000, v0
@@ -17031,7 +17031,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_cvt_f32_f16_e32 v9, v38
; SI-NEXT: v_cvt_f32_f16_e32 v10, v37
; SI-NEXT: v_cvt_f32_f16_e32 v11, v49
-; SI-NEXT: v_cvt_f32_f16_e32 v12, v50
+; SI-NEXT: v_cvt_f32_f16_e32 v12, v51
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
; SI-NEXT: v_add_f32_e32 v10, 0x38000000, v10
; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
@@ -17209,7 +17209,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; SI-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; SI-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; SI-NEXT: s_waitcnt expcnt(0)
@@ -17227,14 +17227,14 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_mov_b32_e32 v56, v43
; SI-NEXT: v_mov_b32_e32 v43, v54
; SI-NEXT: v_mov_b32_e32 v54, v24
+; SI-NEXT: v_mov_b32_e32 v32, v50
; SI-NEXT: v_mov_b32_e32 v50, v34
; SI-NEXT: v_mov_b32_e32 v34, v62
; SI-NEXT: v_mov_b32_e32 v62, v57
; SI-NEXT: v_mov_b32_e32 v57, v44
; SI-NEXT: v_mov_b32_e32 v44, v55
; SI-NEXT: v_mov_b32_e32 v55, v25
-; SI-NEXT: v_mov_b32_e32 v32, v33
-; SI-NEXT: v_mov_b32_e32 v33, v63
+; SI-NEXT: v_mov_b32_e32 v51, v63
; SI-NEXT: v_mov_b32_e32 v63, v58
; SI-NEXT: v_mov_b32_e32 v58, v45
; SI-NEXT: v_mov_b32_e32 v45, v40
@@ -17254,14 +17254,14 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_mov_b32_e32 v40, v45
; SI-NEXT: v_mov_b32_e32 v45, v58
; SI-NEXT: v_mov_b32_e32 v58, v63
-; SI-NEXT: v_mov_b32_e32 v63, v33
-; SI-NEXT: v_mov_b32_e32 v33, v32
+; SI-NEXT: v_mov_b32_e32 v63, v51
; SI-NEXT: v_mov_b32_e32 v25, v55
; SI-NEXT: v_mov_b32_e32 v55, v44
; SI-NEXT: v_mov_b32_e32 v44, v57
; SI-NEXT: v_mov_b32_e32 v57, v62
; SI-NEXT: v_mov_b32_e32 v62, v34
; SI-NEXT: v_mov_b32_e32 v34, v50
+; SI-NEXT: v_mov_b32_e32 v50, v32
; SI-NEXT: v_mov_b32_e32 v24, v54
; SI-NEXT: v_mov_b32_e32 v54, v43
; SI-NEXT: v_mov_b32_e32 v43, v56
@@ -17278,7 +17278,7 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a,
; SI-NEXT: v_mov_b32_e32 v46, v59
; SI-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; SI-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; SI-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
@@ -24693,7 +24693,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_cvt_f16_f32_e32 v37, v5
; SI-NEXT: v_cvt_f16_f32_e32 v49, v4
; SI-NEXT: v_cvt_f16_f32_e32 v35, v7
-; SI-NEXT: v_cvt_f16_f32_e32 v50, v6
+; SI-NEXT: v_cvt_f16_f32_e32 v51, v6
; SI-NEXT: v_cvt_f16_f32_e32 v32, v9
; SI-NEXT: v_cvt_f16_f32_e32 v59, v8
; SI-NEXT: v_cvt_f16_f32_e32 v58, v11
@@ -24716,8 +24716,8 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_cvt_f16_f32_e32 v24, v26
; SI-NEXT: v_cvt_f16_f32_e32 v22, v29
; SI-NEXT: v_cvt_f16_f32_e32 v23, v28
-; SI-NEXT: v_cvt_f16_f32_e32 v51, s17
-; SI-NEXT: v_cvt_f16_f32_e32 v33, s16
+; SI-NEXT: v_cvt_f16_f32_e32 v33, s17
+; SI-NEXT: v_cvt_f16_f32_e32 v50, s16
; SI-NEXT: v_cvt_f16_f32_e32 v1, s19
; SI-NEXT: v_cvt_f16_f32_e32 v34, s18
; SI-NEXT: v_cvt_f16_f32_e32 v2, s21
@@ -24740,7 +24740,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: ; %bb.1: ; %cmp.false
; SI-NEXT: s_waitcnt expcnt(1)
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v51
+; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v33
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v2, v3, v2
@@ -24763,7 +24763,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_lshlrev_b32_e32 v19, 16, v52
; SI-NEXT: v_lshlrev_b32_e32 v20, 16, v25
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v22
-; SI-NEXT: v_or_b32_e32 v0, v33, v0
+; SI-NEXT: v_or_b32_e32 v0, v50, v0
; SI-NEXT: v_or_b32_e32 v1, v34, v1
; SI-NEXT: v_or_b32_e32 v3, v62, v3
; SI-NEXT: v_or_b32_e32 v4, v36, v4
@@ -24772,7 +24772,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_or_b32_e32 v7, v48, v7
; SI-NEXT: v_or_b32_e32 v8, v38, v8
; SI-NEXT: v_or_b32_e32 v9, v49, v9
-; SI-NEXT: v_or_b32_e32 v10, v50, v10
+; SI-NEXT: v_or_b32_e32 v10, v51, v10
; SI-NEXT: v_or_b32_e32 v11, v59, v11
; SI-NEXT: v_or_b32_e32 v12, v57, v12
; SI-NEXT: v_or_b32_e32 v13, v47, v13
@@ -24787,8 +24787,8 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: s_cbranch_execnz .LBB47_3
; SI-NEXT: .LBB47_2: ; %cmp.true
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; SI-NEXT: v_cvt_f32_f16_e32 v0, v51
-; SI-NEXT: v_cvt_f32_f16_e32 v1, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v50
; SI-NEXT: v_cvt_f32_f16_e32 v3, v34
; SI-NEXT: v_cvt_f32_f16_e32 v4, v62
; SI-NEXT: v_add_f32_e32 v0, 0x38000000, v0
@@ -24817,7 +24817,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_cvt_f32_f16_e32 v9, v38
; SI-NEXT: v_cvt_f32_f16_e32 v10, v37
; SI-NEXT: v_cvt_f32_f16_e32 v11, v49
-; SI-NEXT: v_cvt_f32_f16_e32 v12, v50
+; SI-NEXT: v_cvt_f32_f16_e32 v12, v51
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
; SI-NEXT: v_add_f32_e32 v10, 0x38000000, v10
; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
@@ -24995,7 +24995,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; SI-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; SI-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; SI-NEXT: s_waitcnt expcnt(0)
@@ -25013,14 +25013,14 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_mov_b32_e32 v56, v43
; SI-NEXT: v_mov_b32_e32 v43, v54
; SI-NEXT: v_mov_b32_e32 v54, v24
+; SI-NEXT: v_mov_b32_e32 v32, v50
; SI-NEXT: v_mov_b32_e32 v50, v34
; SI-NEXT: v_mov_b32_e32 v34, v62
; SI-NEXT: v_mov_b32_e32 v62, v57
; SI-NEXT: v_mov_b32_e32 v57, v44
; SI-NEXT: v_mov_b32_e32 v44, v55
; SI-NEXT: v_mov_b32_e32 v55, v25
-; SI-NEXT: v_mov_b32_e32 v32, v33
-; SI-NEXT: v_mov_b32_e32 v33, v63
+; SI-NEXT: v_mov_b32_e32 v51, v63
; SI-NEXT: v_mov_b32_e32 v63, v58
; SI-NEXT: v_mov_b32_e32 v58, v45
; SI-NEXT: v_mov_b32_e32 v45, v40
@@ -25040,14 +25040,14 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_mov_b32_e32 v40, v45
; SI-NEXT: v_mov_b32_e32 v45, v58
; SI-NEXT: v_mov_b32_e32 v58, v63
-; SI-NEXT: v_mov_b32_e32 v63, v33
-; SI-NEXT: v_mov_b32_e32 v33, v32
+; SI-NEXT: v_mov_b32_e32 v63, v51
; SI-NEXT: v_mov_b32_e32 v25, v55
; SI-NEXT: v_mov_b32_e32 v55, v44
; SI-NEXT: v_mov_b32_e32 v44, v57
; SI-NEXT: v_mov_b32_e32 v57, v62
; SI-NEXT: v_mov_b32_e32 v62, v34
; SI-NEXT: v_mov_b32_e32 v34, v50
+; SI-NEXT: v_mov_b32_e32 v50, v32
; SI-NEXT: v_mov_b32_e32 v24, v54
; SI-NEXT: v_mov_b32_e32 v54, v43
; SI-NEXT: v_mov_b32_e32 v43, v56
@@ -25064,7 +25064,7 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i
; SI-NEXT: v_mov_b32_e32 v46, v59
; SI-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; SI-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; SI-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
@@ -31528,7 +31528,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_cvt_f16_f32_e32 v37, v5
; SI-NEXT: v_cvt_f16_f32_e32 v49, v4
; SI-NEXT: v_cvt_f16_f32_e32 v35, v7
-; SI-NEXT: v_cvt_f16_f32_e32 v50, v6
+; SI-NEXT: v_cvt_f16_f32_e32 v51, v6
; SI-NEXT: v_cvt_f16_f32_e32 v32, v9
; SI-NEXT: v_cvt_f16_f32_e32 v59, v8
; SI-NEXT: v_cvt_f16_f32_e32 v58, v11
@@ -31551,8 +31551,8 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_cvt_f16_f32_e32 v24, v26
; SI-NEXT: v_cvt_f16_f32_e32 v22, v29
; SI-NEXT: v_cvt_f16_f32_e32 v23, v28
-; SI-NEXT: v_cvt_f16_f32_e32 v51, s17
-; SI-NEXT: v_cvt_f16_f32_e32 v33, s16
+; SI-NEXT: v_cvt_f16_f32_e32 v33, s17
+; SI-NEXT: v_cvt_f16_f32_e32 v50, s16
; SI-NEXT: v_cvt_f16_f32_e32 v1, s19
; SI-NEXT: v_cvt_f16_f32_e32 v34, s18
; SI-NEXT: v_cvt_f16_f32_e32 v2, s21
@@ -31575,7 +31575,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: ; %bb.1: ; %cmp.false
; SI-NEXT: s_waitcnt expcnt(1)
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v51
+; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v33
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v2, v3, v2
@@ -31598,7 +31598,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_lshlrev_b32_e32 v19, 16, v52
; SI-NEXT: v_lshlrev_b32_e32 v20, 16, v25
; SI-NEXT: v_lshlrev_b32_e32 v21, 16, v22
-; SI-NEXT: v_or_b32_e32 v0, v33, v0
+; SI-NEXT: v_or_b32_e32 v0, v50, v0
; SI-NEXT: v_or_b32_e32 v1, v34, v1
; SI-NEXT: v_or_b32_e32 v3, v62, v3
; SI-NEXT: v_or_b32_e32 v4, v36, v4
@@ -31607,7 +31607,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_or_b32_e32 v7, v48, v7
; SI-NEXT: v_or_b32_e32 v8, v38, v8
; SI-NEXT: v_or_b32_e32 v9, v49, v9
-; SI-NEXT: v_or_b32_e32 v10, v50, v10
+; SI-NEXT: v_or_b32_e32 v10, v51, v10
; SI-NEXT: v_or_b32_e32 v11, v59, v11
; SI-NEXT: v_or_b32_e32 v12, v57, v12
; SI-NEXT: v_or_b32_e32 v13, v47, v13
@@ -31622,8 +31622,8 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: s_cbranch_execnz .LBB55_3
; SI-NEXT: .LBB55_2: ; %cmp.true
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; SI-NEXT: v_cvt_f32_f16_e32 v0, v51
-; SI-NEXT: v_cvt_f32_f16_e32 v1, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v33
+; SI-NEXT: v_cvt_f32_f16_e32 v1, v50
; SI-NEXT: v_cvt_f32_f16_e32 v3, v34
; SI-NEXT: v_cvt_f32_f16_e32 v4, v62
; SI-NEXT: v_add_f32_e32 v0, 0x38000000, v0
@@ -31652,7 +31652,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_cvt_f32_f16_e32 v9, v38
; SI-NEXT: v_cvt_f32_f16_e32 v10, v37
; SI-NEXT: v_cvt_f32_f16_e32 v11, v49
-; SI-NEXT: v_cvt_f32_f16_e32 v12, v50
+; SI-NEXT: v_cvt_f32_f16_e32 v12, v51
; SI-NEXT: v_add_f32_e32 v9, 0x38000000, v9
; SI-NEXT: v_add_f32_e32 v10, 0x38000000, v10
; SI-NEXT: v_cvt_f16_f32_e32 v9, v9
@@ -31830,7 +31830,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; SI-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; SI-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; SI-NEXT: s_waitcnt expcnt(0)
@@ -31848,14 +31848,14 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_mov_b32_e32 v56, v43
; SI-NEXT: v_mov_b32_e32 v43, v54
; SI-NEXT: v_mov_b32_e32 v54, v24
+; SI-NEXT: v_mov_b32_e32 v32, v50
; SI-NEXT: v_mov_b32_e32 v50, v34
; SI-NEXT: v_mov_b32_e32 v34, v62
; SI-NEXT: v_mov_b32_e32 v62, v57
; SI-NEXT: v_mov_b32_e32 v57, v44
; SI-NEXT: v_mov_b32_e32 v44, v55
; SI-NEXT: v_mov_b32_e32 v55, v25
-; SI-NEXT: v_mov_b32_e32 v32, v33
-; SI-NEXT: v_mov_b32_e32 v33, v63
+; SI-NEXT: v_mov_b32_e32 v51, v63
; SI-NEXT: v_mov_b32_e32 v63, v58
; SI-NEXT: v_mov_b32_e32 v58, v45
; SI-NEXT: v_mov_b32_e32 v45, v40
@@ -31875,14 +31875,14 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_mov_b32_e32 v40, v45
; SI-NEXT: v_mov_b32_e32 v45, v58
; SI-NEXT: v_mov_b32_e32 v58, v63
-; SI-NEXT: v_mov_b32_e32 v63, v33
-; SI-NEXT: v_mov_b32_e32 v33, v32
+; SI-NEXT: v_mov_b32_e32 v63, v51
; SI-NEXT: v_mov_b32_e32 v25, v55
; SI-NEXT: v_mov_b32_e32 v55, v44
; SI-NEXT: v_mov_b32_e32 v44, v57
; SI-NEXT: v_mov_b32_e32 v57, v62
; SI-NEXT: v_mov_b32_e32 v62, v34
; SI-NEXT: v_mov_b32_e32 v34, v50
+; SI-NEXT: v_mov_b32_e32 v50, v32
; SI-NEXT: v_mov_b32_e32 v24, v54
; SI-NEXT: v_mov_b32_e32 v54, v43
; SI-NEXT: v_mov_b32_e32 v43, v56
@@ -31899,7 +31899,7 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a
; SI-NEXT: v_mov_b32_e32 v46, v59
; SI-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; SI-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; SI-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll
index 4f46875076809..e8e20f75964a1 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll
@@ -11977,7 +11977,6 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; SI-NEXT: v_mov_b32_e32 v61, v40
-; SI-NEXT: v_mov_b32_e32 v40, v44
; SI-NEXT: s_cbranch_vccnz .LBB19_5
; SI-NEXT: ; %bb.4: ; %cmp.true
; SI-NEXT: s_waitcnt expcnt(5)
@@ -12012,7 +12011,7 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i
; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
-; SI-NEXT: v_cvt_f32_f16_e32 v14, v40
+; SI-NEXT: v_cvt_f32_f16_e32 v14, v44
; SI-NEXT: v_mov_b32_e32 v55, v42
; SI-NEXT: v_cvt_f32_f16_e32 v15, v61
; SI-NEXT: v_cvt_f32_f16_e32 v17, v43
@@ -23964,7 +23963,6 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a,
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; SI-NEXT: v_mov_b32_e32 v61, v40
-; SI-NEXT: v_mov_b32_e32 v40, v44
; SI-NEXT: s_cbranch_vccnz .LBB35_5
; SI-NEXT: ; %bb.4: ; %cmp.true
; SI-NEXT: s_waitcnt expcnt(5)
@@ -23999,7 +23997,7 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a,
; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
-; SI-NEXT: v_cvt_f32_f16_e32 v14, v40
+; SI-NEXT: v_cvt_f32_f16_e32 v14, v44
; SI-NEXT: v_mov_b32_e32 v55, v42
; SI-NEXT: v_cvt_f32_f16_e32 v15, v61
; SI-NEXT: v_cvt_f32_f16_e32 v17, v43
@@ -35116,7 +35114,6 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; SI-NEXT: v_mov_b32_e32 v61, v40
-; SI-NEXT: v_mov_b32_e32 v40, v44
; SI-NEXT: s_cbranch_vccnz .LBB47_5
; SI-NEXT: ; %bb.4: ; %cmp.true
; SI-NEXT: s_waitcnt expcnt(5)
@@ -35151,7 +35148,7 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i
; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
-; SI-NEXT: v_cvt_f32_f16_e32 v14, v40
+; SI-NEXT: v_cvt_f32_f16_e32 v14, v44
; SI-NEXT: v_mov_b32_e32 v55, v42
; SI-NEXT: v_cvt_f32_f16_e32 v15, v61
; SI-NEXT: v_cvt_f32_f16_e32 v17, v43
@@ -45101,7 +45098,6 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a
; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; SI-NEXT: v_mov_b32_e32 v61, v40
-; SI-NEXT: v_mov_b32_e32 v40, v44
; SI-NEXT: s_cbranch_vccnz .LBB55_5
; SI-NEXT: ; %bb.4: ; %cmp.true
; SI-NEXT: s_waitcnt expcnt(5)
@@ -45136,7 +45132,7 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a
; SI-NEXT: v_cvt_f16_f32_e32 v12, v12
; SI-NEXT: v_add_f32_e32 v13, 0x38000000, v13
; SI-NEXT: v_cvt_f16_f32_e32 v13, v13
-; SI-NEXT: v_cvt_f32_f16_e32 v14, v40
+; SI-NEXT: v_cvt_f32_f16_e32 v14, v44
; SI-NEXT: v_mov_b32_e32 v55, v42
; SI-NEXT: v_cvt_f32_f16_e32 v15, v61
; SI-NEXT: v_cvt_f32_f16_e32 v17, v43
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll
index 4aded5da3668a..5770e58992e09 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll
@@ -12409,14 +12409,14 @@ define <6 x i16> @bitcast_v6bf16_to_v6i16(<6 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v9
; GFX11-TRUE16-NEXT: v_bfe_u32 v11, v3, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_add_f32_e32 v5, 0x40c00000, v6
; GFX11-TRUE16-NEXT: v_add3_u32 v6, v10, v9, 0x7fff
; GFX11-TRUE16-NEXT: v_bfe_u32 v10, v1, 16, 1
; GFX11-TRUE16-NEXT: v_add3_u32 v9, v11, v3, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo
; GFX11-TRUE16-NEXT: v_bfe_u32 v12, v5, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_add3_u32 v7, v10, v1, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v3
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
@@ -12426,26 +12426,30 @@ define <6 x i16> @bitcast_v6bf16_to_v6i16(<6 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v3, v9, v10 :: v_dual_add_f32 v0, 0x40c00000, v0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v0, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v5, v11, v12, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
; GFX11-TRUE16-NEXT: v_add3_u32 v8, v8, v0, 0x7fff
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v7, v13, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v8, v2, vcc_lo
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v5.h
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v6
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.h
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v0, 16, v2
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v1, 16, v3
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v5, 16, v4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v6
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v4
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v2, 16, v3
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v5, 16, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v6, 16, v0
; GFX11-TRUE16-NEXT: .LBB52_2: ; %end
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 97df2a0dbd44b..91cb00bffb04b 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -2026,7 +2026,9 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX9-NEXT: v_mov_b32_e32 v0, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s6, v2, v[0:1]
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s7, v2, v[1:2]
+; GFX9-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX9-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s7, v2, v[3:4]
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 2
@@ -2068,8 +2070,10 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_readfirstlane_b32 s3, v1
; GFX1064-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1064-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s6, v2, s[2:3]
-; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s7, v2, v[1:2]
+; GFX1064-NEXT: v_mov_b32_e32 v3, v1
+; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s7, v2, v[3:4]
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -2109,9 +2113,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_readfirstlane_b32 s3, v1
; GFX1032-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1032-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, s6, v2, s[2:3]
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
-; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s7, v2, v[1:2]
+; GFX1032-NEXT: v_mov_b32_e32 v3, v1
+; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s7, v2, v[3:4]
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
@@ -2151,12 +2157,15 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1164-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
-; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
-; GFX1164-NEXT: v_mov_b32_e32 v1, v3
+; GFX1164-NEXT: v_mov_b32_e32 v3, v1
+; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[3:4]
+; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164-NEXT: v_mov_b32_e32 v1, v5
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1164-NEXT: s_endpgm
;
@@ -2194,12 +2203,15 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1132-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: s_mov_b32 s2, -1
-; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
-; GFX1132-NEXT: v_mov_b32_e32 v1, v3
+; GFX1132-NEXT: v_mov_b32_e32 v3, v1
+; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[3:4]
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-NEXT: v_mov_b32_e32 v1, v5
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1132-NEXT: s_endpgm
;
@@ -2236,11 +2248,13 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1264-NEXT: s_wait_kmcnt 0x0
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1264-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
; GFX1264-NEXT: s_mov_b32 s2, -1
-; GFX1264-NEXT: v_mad_co_u64_u32 v[1:2], null, s5, v2, v[1:2]
+; GFX1264-NEXT: v_mov_b32_e32 v3, v1
+; GFX1264-NEXT: v_mad_co_u64_u32 v[1:2], null, s5, v2, v[3:4]
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
; GFX1264-NEXT: s_endpgm
;
@@ -2276,11 +2290,13 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1232-NEXT: s_wait_kmcnt 0x0
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1232-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
; GFX1232-NEXT: s_mov_b32 s2, -1
-; GFX1232-NEXT: v_mad_co_u64_u32 v[1:2], null, s5, v2, v[1:2]
+; GFX1232-NEXT: v_mov_b32_e32 v3, v1
+; GFX1232-NEXT: v_mad_co_u64_u32 v[1:2], null, s5, v2, v[3:4]
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
; GFX1232-NEXT: s_endpgm
entry:
@@ -6224,13 +6240,14 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX9-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s8, v4, 0
-; GFX9-NEXT: v_readfirstlane_b32 s7, v0
; GFX9-NEXT: v_readfirstlane_b32 s6, v1
+; GFX9-NEXT: v_readfirstlane_b32 s7, v0
+; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s9, v4, v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v3, s6
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s7, v2
-; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v3, v0, vcc
; GFX9-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], 0
@@ -6288,7 +6305,9 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1064-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mad_u64_u32 v[2:3], s[2:3], s8, v4, 0
-; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[2:3], s9, v4, v[3:4]
+; GFX1064-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX1064-NEXT: v_mov_b32_e32 v5, v3
+; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[2:3], s9, v4, v[5:6]
; GFX1064-NEXT: v_readfirstlane_b32 s2, v0
; GFX1064-NEXT: v_readfirstlane_b32 s3, v1
; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v2
@@ -6349,8 +6368,10 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s10
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mad_u64_u32 v[2:3], s2, s8, v4, 0
+; GFX1032-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1032-NEXT: v_readfirstlane_b32 s3, v1
-; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s2, s9, v4, v[3:4]
+; GFX1032-NEXT: v_mov_b32_e32 v5, v3
+; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s2, s9, v4, v[5:6]
; GFX1032-NEXT: v_readfirstlane_b32 s2, v0
; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v2
; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v3, vcc_lo
@@ -6413,17 +6434,19 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1164-NEXT: ; %bb.3: ; %Flow
; GFX1164-NEXT: s_or_b64 exec, exec, s[12:13]
; GFX1164-NEXT: .LBB10_4: ; %Flow4
-; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX1164-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mad_u64_u32 v[2:3], null, s8, v4, 0
+; GFX1164-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
-; GFX1164-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s9, v4, v[3:4]
+; GFX1164-NEXT: v_mov_b32_e32 v5, v3
+; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v2
; GFX1164-NEXT: s_mov_b32 s2, -1
-; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc
+; GFX1164-NEXT: v_mad_u64_u32 v[7:8], null, s9, v4, v[5:6]
+; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v7, vcc
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1164-NEXT: s_endpgm
@@ -6483,13 +6506,15 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s10
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_mad_u64_u32 v[2:3], null, s8, v4, 0
+; GFX1132-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
-; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s9, v4, v[3:4]
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1132-NEXT: v_mov_b32_e32 v5, v3
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v2
; GFX1132-NEXT: s_mov_b32 s2, -1
-; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc_lo
+; GFX1132-NEXT: v_mad_u64_u32 v[7:8], null, s9, v4, v[5:6]
+; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v7, vcc_lo
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1132-NEXT: s_endpgm
@@ -6526,12 +6551,15 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1264-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX1264-NEXT: s_wait_kmcnt 0x0
; GFX1264-NEXT: v_mad_co_u64_u32 v[3:4], null, s4, v2, 0
+; GFX1264-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
+; GFX1264-NEXT: v_mov_b32_e32 v5, v4
; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s2, v3
; GFX1264-NEXT: s_mov_b32 s2, -1
+; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[5:6]
+; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1264-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -6568,12 +6596,15 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GFX1232-NEXT: s_wait_kmcnt 0x0
; GFX1232-NEXT: v_mad_co_u64_u32 v[3:4], null, s4, v2, 0
+; GFX1232-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
+; GFX1232-NEXT: v_mov_b32_e32 v5, v4
; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
; GFX1232-NEXT: s_mov_b32 s2, -1
+; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[5:6]
+; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1232-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc_lo
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 23c5f4f5506f3..4638f641ffbc5 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -1794,9 +1794,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, v[0:1]
+; GFX9-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[1:2]
+; GFX9-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[3:4]
; GFX9-NEXT: s_mov_b32 s4, s0
; GFX9-NEXT: s_mov_b32 s5, s1
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -1830,9 +1832,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX1064-NEXT: v_readfirstlane_b32 s5, v1
; GFX1064-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1064-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5]
-; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[1:2]
+; GFX1064-NEXT: v_mov_b32_e32 v3, v1
+; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[3:4]
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -1865,9 +1869,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1032-NEXT: v_readfirstlane_b32 s5, v1
; GFX1032-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1032-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5]
-; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s3, v2, v[1:2]
+; GFX1032-NEXT: v_mov_b32_e32 v3, v1
+; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s3, v2, v[3:4]
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -1901,13 +1907,15 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX1164-NEXT: v_readfirstlane_b32 s5, v1
; GFX1164-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1164-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5]
; GFX1164-NEXT: s_mov_b32 s2, -1
-; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s3, v2, v[1:2]
+; GFX1164-NEXT: v_mov_b32_e32 v3, v1
+; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[3:4]
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
-; GFX1164-NEXT: v_mov_b32_e32 v1, v3
+; GFX1164-NEXT: v_mov_b32_e32 v1, v5
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1164-NEXT: s_endpgm
;
@@ -1938,13 +1946,15 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1132-NEXT: v_readfirstlane_b32 s5, v1
; GFX1132-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1132-NEXT: ; implicit-def: $vgpr3_vgpr4
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5]
; GFX1132-NEXT: s_mov_b32 s2, -1
-; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s3, v2, v[1:2]
+; GFX1132-NEXT: v_mov_b32_e32 v3, v1
+; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[3:4]
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
-; GFX1132-NEXT: v_mov_b32_e32 v1, v3
+; GFX1132-NEXT: v_mov_b32_e32 v1, v5
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1132-NEXT: s_endpgm
entry:
@@ -5212,11 +5222,12 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive)
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[8:9], s2, v2, 0
+; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_readfirstlane_b32 s8, v0
+; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: s_mov_b32 s4, s0
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v2, v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v2, s2
; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s8, v3
@@ -5254,8 +5265,10 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive)
; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s2, v2, 0
+; GFX1064-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1064-NEXT: v_readfirstlane_b32 s4, v1
-; GFX1064-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v2, v[4:5]
+; GFX1064-NEXT: v_mov_b32_e32 v5, v4
+; GFX1064-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v2, v[5:6]
; GFX1064-NEXT: v_readfirstlane_b32 s2, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v3
@@ -5291,8 +5304,10 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive)
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s2, s2, v2, 0
+; GFX1032-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1032-NEXT: v_readfirstlane_b32 s4, v1
-; GFX1032-NEXT: v_mad_u64_u32 v[4:5], s2, s3, v2, v[4:5]
+; GFX1032-NEXT: v_mov_b32_e32 v5, v4
+; GFX1032-NEXT: v_mad_u64_u32 v[4:5], s2, s3, v2, v[5:6]
; GFX1032-NEXT: v_readfirstlane_b32 s2, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
@@ -5329,14 +5344,16 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive)
; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0
+; GFX1164-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
; GFX1164-NEXT: v_readfirstlane_b32 s4, v1
-; GFX1164-NEXT: s_waitcnt_depctr 0xfff
-; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5]
+; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1164-NEXT: v_mov_b32_e32 v5, v4
; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
-; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
; GFX1164-NEXT: s_mov_b32 s2, -1
-; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s4, v5, vcc
+; GFX1164-NEXT: v_mad_u64_u32 v[7:8], null, s3, v2, v[5:6]
+; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s4, v7, vcc
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1164-NEXT: s_endpgm
;
@@ -5367,14 +5384,16 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive)
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0
+; GFX1132-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
; GFX1132-NEXT: v_readfirstlane_b32 s4, v1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5]
+; GFX1132-NEXT: v_mov_b32_e32 v5, v4
; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
-; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
; GFX1132-NEXT: s_mov_b32 s2, -1
-; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s4, v5, vcc_lo
+; GFX1132-NEXT: v_mad_u64_u32 v[7:8], null, s3, v2, v[5:6]
+; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s4, v7, vcc_lo
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX1132-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 44c719f3635c8..4db3233aa79f0 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -10053,10 +10053,11 @@ define <3 x bfloat> @v_fadd_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -14733,10 +14734,11 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -15545,10 +15547,11 @@ define <3 x bfloat> @v_fmul_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -20539,10 +20542,11 @@ define <3 x bfloat> @v_minnum_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -25020,10 +25024,11 @@ define <3 x bfloat> @v_maxnum_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -34766,17 +34771,19 @@ define bfloat @v_sitofp_i16_to_bf16(i16 %x) {
;
; GFX11TRUE16-LABEL: v_sitofp_i16_to_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
; GFX11TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
; GFX11TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -35085,9 +35092,10 @@ define <3 x bfloat> @v_sitofp_v3i16_to_v3bf16(<3 x i16> %x) {
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v0, 0x7fff
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
@@ -35701,27 +35709,28 @@ define <3 x bfloat> @v_sitofp_v3i32_to_v3bf16(<3 x i32> %x) {
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
; GFX11TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX11TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX11TRUE16-NEXT: v_cvt_f32_i32_e32 v3, v1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX11TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
; GFX11TRUE16-NEXT: v_bfe_u32 v4, v2, 16, 1
; GFX11TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
-; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
-; GFX11TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
+; GFX11TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11TRUE16-NEXT: v_bfe_u32 v6, v3, 16, 1
; GFX11TRUE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
-; GFX11TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v1
+; GFX11TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v3
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
-; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v1, 0x7fff
+; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v3, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v7.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_sitofp_v3i32_to_v3bf16:
@@ -36853,9 +36862,10 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v2, 0x7fff
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
@@ -37901,10 +37911,11 @@ define <3 x bfloat> @v_uitofp_v3i16_to_v3bf16(<3 x i16> %x) {
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -38524,27 +38535,28 @@ define <3 x bfloat> @v_uitofp_v3i32_to_v3bf16(<3 x i32> %x) {
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v0
; GFX11TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX11TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX11TRUE16-NEXT: v_cvt_f32_u32_e32 v3, v1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX11TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
; GFX11TRUE16-NEXT: v_bfe_u32 v4, v2, 16, 1
; GFX11TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
-; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
-; GFX11TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
+; GFX11TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11TRUE16-NEXT: v_bfe_u32 v6, v3, 16, 1
; GFX11TRUE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
-; GFX11TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v1
+; GFX11TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v3
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
-; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v1, 0x7fff
+; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v3, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v7.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_uitofp_v3i32_to_v3bf16:
@@ -39472,22 +39484,24 @@ define <3 x bfloat> @v_uitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11TRUE16-NEXT: v_ldexp_f32 v2, v2, v5
; GFX11TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1
; GFX11TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0
-; GFX11TRUE16-NEXT: v_bfe_u32 v6, v2, 16, 1
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
; GFX11TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
-; GFX11TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v2
+; GFX11TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX11TRUE16-NEXT: v_bfe_u32 v6, v2, 16, 1
; GFX11TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
-; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v2, 0x7fff
+; GFX11TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v2
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_add3_u32 v6, v6, v2, 0x7fff
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -46402,10 +46416,11 @@ define <3 x bfloat> @v_fma_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b, <3 x bfloat>
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v0, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_fma_v3bf16:
@@ -47350,10 +47365,11 @@ define <3 x bfloat> @v_fmuladd_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b, <3 x bfl
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v0, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v8, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_fmuladd_v3bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
index 53b2542cf9a7e..3c76bb613d21f 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
@@ -2381,18 +2381,20 @@ define <12 x i8> @load_v12i8(ptr addrspace(8) inreg %buf) {
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
+; SDAG-NEXT: ; implicit-def: $vgpr11_vgpr12
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v8, v2
-; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2
+; SDAG-NEXT: v_mov_b32_e32 v11, v2
; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[0:1]
; SDAG-NEXT: v_lshrrev_b32_e32 v14, 8, v0
; SDAG-NEXT: v_lshrrev_b32_e32 v13, 16, v0
-; SDAG-NEXT: v_lshrrev_b64 v[11:12], 24, v[8:9]
+; SDAG-NEXT: v_lshrrev_b64 v[11:12], 24, v[11:12]
; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2
; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; SDAG-NEXT: v_mov_b32_e32 v4, v1
+; SDAG-NEXT: v_mov_b32_e32 v8, v2
; SDAG-NEXT: v_mov_b32_e32 v1, v14
; SDAG-NEXT: v_mov_b32_e32 v2, v13
; SDAG-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index 861621bd92af1..fa5b94fe6cf28 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -48,10 +48,11 @@ define void @undef_lo_v2i16(i16 %arg0) {
;
; GFX11-TRUE16-LABEL: undef_lo_v2i16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-NEXT: ;;#ASMSTART
-; GFX11-TRUE16-NEXT: ; use v0
+; GFX11-TRUE16-NEXT: ; use v1
; GFX11-TRUE16-NEXT: ;;#ASMEND
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x i16> poison, i16 %arg0, i32 1
@@ -99,10 +100,11 @@ define void @undef_lo_v2f16(half %arg0) {
;
; GFX11-TRUE16-LABEL: undef_lo_v2f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-NEXT: ;;#ASMSTART
-; GFX11-TRUE16-NEXT: ; use v0
+; GFX11-TRUE16-NEXT: ; use v1
; GFX11-TRUE16-NEXT: ;;#ASMEND
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x half> poison, half %arg0, i32 1
@@ -157,10 +159,11 @@ define void @undef_lo_op_v2f16(half %arg0) {
;
; GFX11-TRUE16-LABEL: undef_lo_op_v2f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v1, 1.0 op_sel_hi:[1,0]
; GFX11-TRUE16-NEXT: ;;#ASMSTART
; GFX11-TRUE16-NEXT: ; use v0
; GFX11-TRUE16-NEXT: ;;#ASMEND
@@ -240,10 +243,11 @@ define void @undef_lo_op_v2i16(i16 %arg0) {
;
; GFX11-TRUE16-SDAG-LABEL: undef_lo_op_v2i16:
; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-SDAG-NEXT: v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-TRUE16-SDAG-NEXT: v_pk_add_u16 v0, 0x63, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
; GFX11-TRUE16-SDAG-NEXT: ; use v0
; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
@@ -251,10 +255,11 @@ define void @undef_lo_op_v2i16(i16 %arg0) {
;
; GFX11-TRUE16-GISEL-LABEL: undef_lo_op_v2i16:
; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-GISEL-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-GISEL-NEXT: v_pk_add_u16 v0, 0x630063, v0
+; GFX11-TRUE16-GISEL-NEXT: v_pk_add_u16 v0, 0x630063, v1
; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
; GFX11-TRUE16-GISEL-NEXT: ; use v0
; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
@@ -306,10 +311,11 @@ define void @undef_lo3_v4i16(i16 %arg0) {
;
; GFX11-TRUE16-LABEL: undef_lo3_v4i16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-NEXT: ;;#ASMSTART
-; GFX11-TRUE16-NEXT: ; use v[0:1]
+; GFX11-TRUE16-NEXT: ; use v[1:2]
; GFX11-TRUE16-NEXT: ;;#ASMEND
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <4 x i16> poison, i16 %arg0, i32 1
@@ -358,10 +364,11 @@ define void @undef_lo3_v4f16(half %arg0) {
;
; GFX11-TRUE16-LABEL: undef_lo3_v4f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-NEXT: ;;#ASMSTART
-; GFX11-TRUE16-NEXT: ; use v[0:1]
+; GFX11-TRUE16-NEXT: ; use v[1:2]
; GFX11-TRUE16-NEXT: ;;#ASMEND
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <4 x half> poison, half %arg0, i32 1
@@ -412,6 +419,7 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
;
; GFX11-TRUE16-SDAG-LABEL: undef_lo2_v4i16:
; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-TRUE16-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
@@ -515,13 +523,31 @@ define void @undef_hi_v2i16(i16 %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: undef_hi_v2i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use v0
-; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_hi_v2i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v0
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_hi_v2i16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT: ; use v1
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_hi_v2i16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT: ; use v0
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x i16> poison, i16 %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
ret void
@@ -553,13 +579,31 @@ define void @undef_hi_v2f16(half %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: undef_hi_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use v0
-; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_hi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v0
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_hi_v2f16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT: ; use v1
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_hi_v2f16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT: ; use v0
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x half> poison, half %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
ret void
@@ -598,14 +642,35 @@ define void @undef_hi_op_v2f16(half %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: undef_hi_op_v2f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
-; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use v0
-; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_hi_op_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v0
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_hi_op_v2f16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_pk_add_f16 v0, v1, 1.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT: ; use v0
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_hi_op_v2f16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT: ; use v0
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x half> poison, half %arg0, i32 0
%op = fadd <2 x half> %undef.hi, <half 1.0, half 1.0>
call void asm sideeffect "; use $0", "v"(<2 x half> %op);
@@ -674,8 +739,11 @@ define void @undef_hi_op_v2i16(i16 %arg0) {
;
; GFX11-TRUE16-SDAG-LABEL: undef_hi_op_v2i16:
; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-SDAG-NEXT: v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_pk_add_u16 v0, 0x63, v1 op_sel_hi:[0,1]
; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
; GFX11-TRUE16-SDAG-NEXT: ; use v0
; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
@@ -722,13 +790,31 @@ define void @undef_hi3_v4i16(i16 %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: undef_hi3_v4i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use v[0:1]
-; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_hi3_v4i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v[0:1]
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_hi3_v4i16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT: ; use v[1:2]
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_hi3_v4i16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT: ; use v[0:1]
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <4 x i16> poison, i16 %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
ret void
@@ -761,13 +847,31 @@ define void @undef_hi3_v4f16(half %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: undef_hi3_v4f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use v[0:1]
-; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_hi3_v4f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v[0:1]
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_hi3_v4f16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT: ; use v[1:2]
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_hi3_v4f16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT: ; use v[0:1]
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <4 x half> poison, half %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 26f204f29f5a4..b80e31c120c58 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -265,11 +265,20 @@ define float @v_uitofp_i8_to_f32(i8 %arg0) nounwind {
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_uitofp_i8_to_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_uitofp_i8_to_f32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cvt_f32_ubyte0_e32 v0, v1
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_uitofp_i8_to_f32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cvt = uitofp i8 %arg0 to float
ret float %cvt
}
@@ -301,6 +310,7 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind {
;
; GFX11-TRUE16-LABEL: v_uitofp_v2i8_to_v2f32:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
diff --git a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
index 54cbc25043db3..0a848171ba032 100644
--- a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
@@ -27,9 +27,11 @@ define noundef i64 @srem64_3(i64 noundef %i) {
; GFX9-NEXT: v_add3_u32 v3, v6, v3, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v3, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, 3, v[4:5]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -57,10 +59,10 @@ define noundef i64 @srem64_3(i64 noundef %i) {
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v5
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[6:7]
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
@@ -82,10 +84,12 @@ define noundef i64 @srem64_3(i64 noundef %i) {
; GFX1030-NEXT: v_add3_u32 v3, v4, v3, v5
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
-; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, 0, v3, vcc_lo
+; GFX1030-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v3, vcc_lo
+; GFX1030-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
-; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
+; GFX1030-NEXT: v_mov_b32_e32 v4, v3
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v6, 3, v[4:5]
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -114,9 +118,11 @@ define noundef i64 @srem64_6(i64 noundef %i) {
; GFX9-NEXT: v_add3_u32 v3, v6, v3, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v3, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, 3, v[4:5]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -144,10 +150,10 @@ define noundef i64 @srem64_6(i64 noundef %i) {
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v5
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[6:7]
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
@@ -169,10 +175,12 @@ define noundef i64 @srem64_6(i64 noundef %i) {
; GFX1030-NEXT: v_add3_u32 v3, v4, v3, v5
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
-; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, 0, v3, vcc_lo
+; GFX1030-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v3, vcc_lo
+; GFX1030-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
-; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
+; GFX1030-NEXT: v_mov_b32_e32 v4, v3
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v6, 3, v[4:5]
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -188,6 +196,7 @@ define noundef i64 @urem64_3(i64 noundef %i) {
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
+; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
@@ -196,9 +205,9 @@ define noundef i64 @urem64_3(i64 noundef %i) {
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v2, v3, v2, 1
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, 3, 0
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX9-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, 3, v[2:3]
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
+; GFX9-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, v[6:7]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -220,9 +229,10 @@ define noundef i64 @urem64_3(i64 noundef %i) {
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX942-NEXT: v_lshrrev_b32_e32 v2, 1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, v[6:7]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
@@ -233,6 +243,7 @@ define noundef i64 @urem64_3(i64 noundef %i) {
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
+; GFX1030-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[2:3]
@@ -240,11 +251,11 @@ define noundef i64 @urem64_3(i64 noundef %i) {
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v2, v3, v2, 1
-; GFX1030-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v2, 3, 0
-; GFX1030-NEXT: v_mov_b32_e32 v2, v5
+; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 1, v3
+; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4
-; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v3, 3, v[2:3]
+; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, v[6:7]
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v2, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -260,6 +271,7 @@ define noundef i64 @urem64_6(i64 noundef %i) {
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
+; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v2, v4
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
@@ -268,9 +280,9 @@ define noundef i64 @urem64_6(i64 noundef %i) {
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v2, v3, v2, 2
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, 6, 0
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v3
-; GFX9-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, 6, v[2:3]
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 2, v3
+; GFX9-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 6, v[6:7]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -292,9 +304,10 @@ define noundef i64 @urem64_6(i64 noundef %i) {
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 2
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_lshrrev_b32_e32 v3, 2, v3
-; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 6, v[2:3]
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX942-NEXT: v_lshrrev_b32_e32 v2, 2, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 6, v[6:7]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
@@ -305,6 +318,7 @@ define noundef i64 @urem64_6(i64 noundef %i) {
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
+; GFX1030-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v2, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[2:3]
@@ -312,11 +326,11 @@ define noundef i64 @urem64_6(i64 noundef %i) {
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v2, v3, v2, 2
-; GFX1030-NEXT: v_lshrrev_b32_e32 v3, 2, v3
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v2, 6, 0
-; GFX1030-NEXT: v_mov_b32_e32 v2, v5
+; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4
-; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v3, 6, v[2:3]
+; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 6, v[6:7]
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v2, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -1035,9 +1049,11 @@ define noundef i64 @srem64_i32max(i64 noundef %i) {
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, 0
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v5, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, s6, v[3:4]
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v5, vcc
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, s6, v[4:5]
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -1070,10 +1086,10 @@ define noundef i64 @srem64_i32max(i64 noundef %i) {
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[4:5]
; GFX942-NEXT: s_brev_b32 s2, -2
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[2:3]
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[6:7]
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
@@ -1102,10 +1118,12 @@ define noundef i64 @srem64_i32max(i64 noundef %i) {
; GFX1030-NEXT: v_ashrrev_i64 v[4:5], 30, v[2:3]
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
-; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, 0, v5, vcc_lo
+; GFX1030-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v5, vcc_lo
+; GFX1030-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x7fffffff, v2, 0
-; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v4, v[3:4]
+; GFX1030-NEXT: v_mov_b32_e32 v4, v3
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v6, v[4:5]
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -1221,11 +1239,13 @@ define noundef i64 @urem64_i32max(i64 noundef %i) {
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v3, vcc
-; GFX9-NEXT: v_alignbit_b32 v2, v4, v2, 30
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v5, v3, vcc
+; GFX9-NEXT: v_alignbit_b32 v2, v6, v2, 30
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, 0
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 30, v4
-; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, s6, v[3:4]
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 30, v6
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v3, s6, v[4:5]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -1251,9 +1271,10 @@ define noundef i64 @urem64_i32max(i64 noundef %i) {
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 30
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_lshrrev_b32_e32 v3, 30, v3
-; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[2:3]
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX942-NEXT: v_lshrrev_b32_e32 v2, 30, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, s2, v[6:7]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
@@ -1274,12 +1295,14 @@ define noundef i64 @urem64_i32max(i64 noundef %i) {
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
; GFX1030-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
-; GFX1030-NEXT: v_add_co_ci_u32_e64 v4, null, v5, v3, vcc_lo
-; GFX1030-NEXT: v_alignbit_b32 v2, v4, v2, 30
-; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 30, v4
+; GFX1030-NEXT: v_add_co_ci_u32_e64 v6, null, v5, v3, vcc_lo
+; GFX1030-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1030-NEXT: v_alignbit_b32 v2, v6, v2, 30
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x7fffffff, v2, 0
-; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v4, v[3:4]
+; GFX1030-NEXT: v_mov_b32_e32 v4, v3
+; GFX1030-NEXT: v_lshrrev_b32_e32 v3, 30, v6
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v3, v[4:5]
; GFX1030-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 6c8207a4b1396..98a2af36856f7 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1159,8 +1159,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_subb_co_u32_e32 v9, vcc, v1, v16, vcc
; GFX9-G-NEXT: v_ashrrev_i32_e32 v17, 31, v7
; GFX9-G-NEXT: v_xor_b32_e32 v3, v16, v3
-; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v2, v16, vcc
-; GFX9-G-NEXT: v_subb_co_u32_e32 v11, vcc, v3, v16, vcc
+; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v2, v16, vcc
+; GFX9-G-NEXT: v_subb_co_u32_e32 v13, vcc, v3, v16, vcc
; GFX9-G-NEXT: v_xor_b32_e32 v0, v17, v4
; GFX9-G-NEXT: v_xor_b32_e32 v1, v17, v5
; GFX9-G-NEXT: v_sub_co_u32_e32 v18, vcc, v0, v17
@@ -1172,8 +1172,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_or_b32_e32 v0, v18, v4
; GFX9-G-NEXT: v_or_b32_e32 v1, v19, v5
; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GFX9-G-NEXT: v_or_b32_e32 v0, v8, v10
-; GFX9-G-NEXT: v_or_b32_e32 v1, v9, v11
+; GFX9-G-NEXT: v_or_b32_e32 v0, v8, v12
+; GFX9-G-NEXT: v_or_b32_e32 v1, v9, v13
; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v18
; GFX9-G-NEXT: v_ffbh_u32_e32 v0, v19
@@ -1189,11 +1189,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[6:7]
; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v9
; GFX9-G-NEXT: v_add_u32_e32 v2, 32, v2
-; GFX9-G-NEXT: v_ffbh_u32_e32 v3, v10
+; GFX9-G-NEXT: v_ffbh_u32_e32 v3, v12
; GFX9-G-NEXT: v_min_u32_e32 v1, v1, v2
-; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v11
+; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v13
; GFX9-G-NEXT: v_add_u32_e32 v3, 32, v3
-; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11]
+; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[12:13]
; GFX9-G-NEXT: v_add_u32_e32 v1, 64, v1
; GFX9-G-NEXT: v_min_u32_e32 v2, v2, v3
; GFX9-G-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[6:7]
@@ -1220,8 +1220,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
; GFX9-G-NEXT: v_cndmask_b32_e64 v7, v9, 0, vcc
-; GFX9-G-NEXT: v_cndmask_b32_e64 v12, v10, 0, vcc
-; GFX9-G-NEXT: v_cndmask_b32_e64 v13, v11, 0, vcc
+; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v12, 0, vcc
+; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v13, 0, vcc
; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
; GFX9-G-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GFX9-G-NEXT: v_or_b32_e32 v14, v20, v14
@@ -1236,23 +1236,23 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_addc_co_u32_e32 v22, vcc, 0, v2, vcc
; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, 0, v3, vcc
; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1
-; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, 0x7f, v0
-; GFX9-G-NEXT: v_sub_u32_e32 v0, 64, v12
+; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, 0x7f, v0
+; GFX9-G-NEXT: v_sub_u32_e32 v0, 64, v10
; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9]
-; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v12, v[10:11]
-; GFX9-G-NEXT: v_add_u32_e32 v13, 0xffffffc0, v12
-; GFX9-G-NEXT: v_lshlrev_b64 v[6:7], v12, v[8:9]
+; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v10, v[12:13]
+; GFX9-G-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10
+; GFX9-G-NEXT: v_lshlrev_b64 v[6:7], v10, v[8:9]
; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], v13, v[8:9]
-; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v12
+; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], v11, v[8:9]
+; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
; GFX9-G-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX9-G-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc
; GFX9-G-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX9-G-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
-; GFX9-G-NEXT: v_cndmask_b32_e32 v12, v0, v10, vcc
-; GFX9-G-NEXT: v_cndmask_b32_e32 v13, v1, v11, vcc
+; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
+; GFX9-G-NEXT: v_cndmask_b32_e32 v10, v0, v12, vcc
+; GFX9-G-NEXT: v_cndmask_b32_e32 v11, v1, v13, vcc
; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX9-G-NEXT: v_mov_b32_e32 v0, s8
; GFX9-G-NEXT: v_mov_b32_e32 v1, s9
@@ -1264,12 +1264,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader
; GFX9-G-NEXT: v_sub_u32_e32 v2, 64, v20
; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v20, v[8:9]
-; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
+; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v2, v[12:13]
; GFX9-G-NEXT: v_add_u32_e32 v24, 0xffffffc0, v20
-; GFX9-G-NEXT: v_lshrrev_b64 v[14:15], v20, v[10:11]
+; GFX9-G-NEXT: v_lshrrev_b64 v[14:15], v20, v[12:13]
; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
+; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v24, v[12:13]
; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v20
; GFX9-G-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX9-G-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -1279,8 +1279,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: s_mov_b64 s[8:9], 0
; GFX9-G-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v20
; GFX9-G-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v19, vcc
-; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, v8, s[4:5]
-; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, v9, s[4:5]
+; GFX9-G-NEXT: v_cndmask_b32_e64 v12, v0, v8, s[4:5]
+; GFX9-G-NEXT: v_cndmask_b32_e64 v13, v1, v9, s[4:5]
; GFX9-G-NEXT: v_addc_co_u32_e32 v26, vcc, -1, v4, vcc
; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX9-G-NEXT: v_mov_b32_e32 v0, s8
@@ -1295,21 +1295,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_lshrrev_b32_e32 v8, 31, v7
; GFX9-G-NEXT: v_or_b32_e32 v6, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v7, v1, v3
-; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[10:11]
-; GFX9-G-NEXT: v_lshrrev_b32_e32 v10, 31, v13
+; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[12:13]
+; GFX9-G-NEXT: v_lshrrev_b32_e32 v12, 31, v11
; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 1, v[14:15]
-; GFX9-G-NEXT: v_or_b32_e32 v2, v2, v10
-; GFX9-G-NEXT: v_lshrrev_b32_e32 v14, 31, v11
-; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, v24, v2
+; GFX9-G-NEXT: v_or_b32_e32 v2, v2, v12
+; GFX9-G-NEXT: v_lshrrev_b32_e32 v14, 31, v13
+; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, v24, v2
; GFX9-G-NEXT: v_or_b32_e32 v0, v0, v14
-; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v25, v3, vcc
-; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v26, v0, vcc
-; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v27, v1, vcc
-; GFX9-G-NEXT: v_ashrrev_i32_e32 v28, 31, v10
-; GFX9-G-NEXT: v_and_b32_e32 v10, v28, v18
-; GFX9-G-NEXT: v_and_b32_e32 v11, v28, v19
-; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v10
-; GFX9-G-NEXT: v_subb_co_u32_e32 v11, vcc, v3, v11, vcc
+; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v25, v3, vcc
+; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v26, v0, vcc
+; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v27, v1, vcc
+; GFX9-G-NEXT: v_ashrrev_i32_e32 v28, 31, v12
+; GFX9-G-NEXT: v_and_b32_e32 v12, v28, v18
+; GFX9-G-NEXT: v_and_b32_e32 v13, v28, v19
+; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, v2, v12
+; GFX9-G-NEXT: v_subb_co_u32_e32 v13, vcc, v3, v13, vcc
; GFX9-G-NEXT: v_and_b32_e32 v2, v28, v4
; GFX9-G-NEXT: v_and_b32_e32 v3, v28, v5
; GFX9-G-NEXT: v_subb_co_u32_e32 v14, vcc, v0, v2, vcc
@@ -1318,14 +1318,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
; GFX9-G-NEXT: v_addc_co_u32_e32 v22, vcc, -1, v22, vcc
; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v23, vcc
-; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[12:13]
+; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], 1, v[10:11]
; GFX9-G-NEXT: v_or_b32_e32 v0, v20, v22
; GFX9-G-NEXT: v_or_b32_e32 v1, v21, v23
; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GFX9-G-NEXT: v_or_b32_e32 v12, v12, v8
+; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v8
; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v28
-; GFX9-G-NEXT: v_mov_b32_e32 v0, v8
+; GFX9-G-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-G-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
+; GFX9-G-NEXT: v_mov_b32_e32 v0, v8
; GFX9-G-NEXT: v_mov_b32_e32 v1, v9
; GFX9-G-NEXT: s_andn2_b64 exec, exec, s[8:9]
; GFX9-G-NEXT: s_cbranch_execnz .LBB0_3
@@ -1334,9 +1335,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: .LBB0_5: ; %Flow2
; GFX9-G-NEXT: s_or_b64 exec, exec, s[12:13]
; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[6:7]
-; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[12:13]
+; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], 1, v[10:11]
; GFX9-G-NEXT: v_lshrrev_b32_e32 v4, 31, v7
-; GFX9-G-NEXT: v_or_b32_e32 v12, v12, v4
+; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v4
; GFX9-G-NEXT: v_or_b32_e32 v6, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v7, v1, v3
; GFX9-G-NEXT: .LBB0_6: ; %Flow3
@@ -1345,9 +1346,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_xor_b32_e32 v0, v6, v3
; GFX9-G-NEXT: v_xor_b32_e32 v1, v7, v3
; GFX9-G-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-G-NEXT: v_xor_b32_e32 v2, v12, v3
+; GFX9-G-NEXT: v_xor_b32_e32 v2, v10, v3
; GFX9-G-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-G-NEXT: v_xor_b32_e32 v4, v13, v3
+; GFX9-G-NEXT: v_xor_b32_e32 v4, v11, v3
; GFX9-G-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v3, vcc
; GFX9-G-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
; GFX9-G-NEXT: s_setpc_b64 s[30:31]
@@ -3391,14 +3392,15 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_addc_co_u32_e32 v19, vcc, -1, v19, vcc
; GFX9-G-NEXT: v_addc_co_u32_e32 v20, vcc, -1, v20, vcc
; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
+; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9]
; GFX9-G-NEXT: v_or_b32_e32 v10, v18, v20
; GFX9-G-NEXT: v_or_b32_e32 v11, v19, v21
-; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9]
; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
; GFX9-G-NEXT: v_or_b32_e32 v8, v8, v0
; GFX9-G-NEXT: v_and_b32_e32 v0, 1, v12
-; GFX9-G-NEXT: v_mov_b32_e32 v11, v1
+; GFX9-G-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX9-G-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
+; GFX9-G-NEXT: v_mov_b32_e32 v11, v1
; GFX9-G-NEXT: v_mov_b32_e32 v10, v0
; GFX9-G-NEXT: s_andn2_b64 exec, exec, s[8:9]
; GFX9-G-NEXT: s_cbranch_execnz .LBB1_3
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 5134159e3e406..82052f796daab 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -600,6 +600,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
; GISEL-NEXT: v_or_b32_e32 v8, v8, v36
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GISEL-NEXT: v_mov_b32_e32 v0, v22
; GISEL-NEXT: v_mov_b32_e32 v1, v23
; GISEL-NEXT: s_andn2_b64 exec, exec, s[8:9]
@@ -627,33 +628,33 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_xor_b32_e32 v3, v18, v7
; GISEL-NEXT: v_xor_b32_e32 v4, v19, v12
; GISEL-NEXT: v_xor_b32_e32 v5, v19, v13
-; GISEL-NEXT: v_xor_b32_e32 v14, v19, v14
-; GISEL-NEXT: v_xor_b32_e32 v15, v19, v15
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v18
-; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v1, v18, vcc
+; GISEL-NEXT: v_xor_b32_e32 v6, v19, v14
+; GISEL-NEXT: v_xor_b32_e32 v7, v19, v15
+; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v18
+; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v1, v18, vcc
; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], v4, v19
; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], v5, v19, s[4:5]
-; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v2, v18, vcc
-; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v18, vcc
-; GISEL-NEXT: v_subb_u32_e64 v4, vcc, v14, v19, s[4:5]
-; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v15, v19, vcc
-; GISEL-NEXT: v_ffbh_u32_e32 v14, v21
-; GISEL-NEXT: v_ffbh_u32_e32 v15, v20
-; GISEL-NEXT: v_ffbh_u32_e32 v16, v7
-; GISEL-NEXT: v_ffbh_u32_e32 v17, v6
+; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v2, v18, vcc
+; GISEL-NEXT: v_subb_u32_e32 v15, vcc, v3, v18, vcc
+; GISEL-NEXT: v_subb_u32_e64 v4, vcc, v6, v19, s[4:5]
+; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v7, v19, vcc
+; GISEL-NEXT: v_ffbh_u32_e32 v6, v21
+; GISEL-NEXT: v_ffbh_u32_e32 v7, v20
+; GISEL-NEXT: v_ffbh_u32_e32 v16, v13
+; GISEL-NEXT: v_ffbh_u32_e32 v17, v12
; GISEL-NEXT: v_or_b32_e32 v0, v20, v4
; GISEL-NEXT: v_or_b32_e32 v1, v21, v5
-; GISEL-NEXT: v_or_b32_e32 v2, v6, v12
-; GISEL-NEXT: v_or_b32_e32 v3, v7, v13
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, 32, v15
+; GISEL-NEXT: v_or_b32_e32 v2, v12, v14
+; GISEL-NEXT: v_or_b32_e32 v3, v13, v15
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 32, v7
; GISEL-NEXT: v_ffbh_u32_e32 v26, v5
; GISEL-NEXT: v_ffbh_u32_e32 v27, v4
; GISEL-NEXT: v_add_i32_e32 v17, vcc, 32, v17
-; GISEL-NEXT: v_ffbh_u32_e32 v28, v13
-; GISEL-NEXT: v_ffbh_u32_e32 v29, v12
+; GISEL-NEXT: v_ffbh_u32_e32 v28, v15
+; GISEL-NEXT: v_ffbh_u32_e32 v29, v14
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
-; GISEL-NEXT: v_min_u32_e32 v0, v14, v15
+; GISEL-NEXT: v_min_u32_e32 v0, v6, v7
; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v27
; GISEL-NEXT: v_min_u32_e32 v2, v16, v17
; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v29
@@ -662,35 +663,35 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 64, v2
; GISEL-NEXT: v_min_u32_e32 v3, v28, v3
; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_xor_b32_e32 v6, 0x7f, v2
; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v10, v10, v0
-; GISEL-NEXT: v_or_b32_e32 v11, v3, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_or_b32_e32 v6, v6, v0
+; GISEL-NEXT: v_or_b32_e32 v7, v3, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v11, v14, v15
-; GISEL-NEXT: v_and_b32_e32 v14, 1, v11
-; GISEL-NEXT: v_or_b32_e32 v10, v11, v10
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v14, v6, 0, vcc
-; GISEL-NEXT: v_and_b32_e32 v16, 1, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v15, v7, 0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, 0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v13, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_or_b32_e32 v7, v16, v10
+; GISEL-NEXT: v_and_b32_e32 v10, 1, v7
+; GISEL-NEXT: v_or_b32_e32 v7, v7, v6
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v6, v12, 0, vcc
+; GISEL-NEXT: v_and_b32_e32 v16, 1, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, v13, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v14, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v11, v15, 0, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
@@ -702,23 +703,23 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_not_b32_e32 v2, 63
; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v0, s[4:5]
; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v30, v2
-; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v30
-; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v30
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v30
+; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v30, v2
+; GISEL-NEXT: v_sub_i32_e64 v7, s[4:5], 64, v30
+; GISEL-NEXT: v_lshl_b64 v[0:1], v[12:13], v30
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], v30
; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
-; GISEL-NEXT: v_lshr_b64 v[10:11], v[6:7], v10
-; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v14
+; GISEL-NEXT: v_lshr_b64 v[10:11], v[12:13], v7
+; GISEL-NEXT: v_lshl_b64 v[16:17], v[12:13], v6
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30
-; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc
; GISEL-NEXT: v_or_b32_e32 v0, v10, v2
; GISEL-NEXT: v_or_b32_e32 v1, v11, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v0, v12, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v1, v13, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v0, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v1, v15, vcc
; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
; GISEL-NEXT: v_mov_b32_e32 v0, s8
; GISEL-NEXT: v_mov_b32_e32 v1, s9
@@ -730,101 +731,102 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 64, v26
-; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v26
-; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v26
+; GISEL-NEXT: v_lshr_b64 v[0:1], v[14:15], v26
+; GISEL-NEXT: v_lshr_b64 v[2:3], v[12:13], v26
; GISEL-NEXT: s_mov_b64 s[4:5], 0
; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v20
; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v21, vcc
-; GISEL-NEXT: v_lshl_b64 v[16:17], v[12:13], v16
-; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v32
+; GISEL-NEXT: v_lshl_b64 v[16:17], v[14:15], v16
+; GISEL-NEXT: v_lshr_b64 v[14:15], v[14:15], v32
; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v4, vcc
; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v5, vcc
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_or_b32_e32 v2, v2, v16
; GISEL-NEXT: v_or_b32_e32 v3, v3, v17
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v14, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v15, v3, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v2, v6, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v3, v7, vcc
-; GISEL-NEXT: v_mov_b32_e32 v7, 0
+; GISEL-NEXT: v_cndmask_b32_e32 v14, v2, v12, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v15, v3, v13, vcc
+; GISEL-NEXT: v_mov_b32_e32 v13, 0
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
; GISEL-NEXT: v_mov_b32_e32 v2, s6
; GISEL-NEXT: v_mov_b32_e32 v3, s7
; GISEL-NEXT: .LBB0_9: ; %udiv-do-while
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1
+; GISEL-NEXT: v_lshl_b64 v[34:35], v[14:15], 1
; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v6, 31, v13
-; GISEL-NEXT: v_lshrrev_b32_e32 v34, 31, v11
-; GISEL-NEXT: v_lshl_b64 v[12:13], v[14:15], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v2, 31, v15
+; GISEL-NEXT: v_or_b32_e32 v16, v16, v2
+; GISEL-NEXT: v_lshrrev_b32_e32 v2, 31, v11
+; GISEL-NEXT: v_or_b32_e32 v14, v34, v2
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[6:7], 1
; GISEL-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v15
+; GISEL-NEXT: v_lshrrev_b32_e32 v6, 31, v7
+; GISEL-NEXT: v_or_b32_e32 v10, v10, v6
+; GISEL-NEXT: v_or_b32_e32 v6, v0, v2
+; GISEL-NEXT: v_or_b32_e32 v7, v1, v3
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v30, v14
+; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v31, v35, vcc
+; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v32, v16, vcc
+; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v33, v17, vcc
+; GISEL-NEXT: v_ashrrev_i32_e32 v15, 31, v0
+; GISEL-NEXT: v_and_b32_e32 v12, 1, v15
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT: v_mov_b32_e32 v0, v12
+; GISEL-NEXT: v_mov_b32_e32 v1, v13
+; GISEL-NEXT: v_and_b32_e32 v2, v15, v20
+; GISEL-NEXT: v_and_b32_e32 v3, v15, v21
+; GISEL-NEXT: v_and_b32_e32 v12, v15, v4
+; GISEL-NEXT: v_and_b32_e32 v34, v15, v5
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v14, v2
+; GISEL-NEXT: v_subb_u32_e32 v15, vcc, v35, v3, vcc
+; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v12, vcc
+; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v34, vcc
; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v26
; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc
-; GISEL-NEXT: v_or_b32_e32 v16, v16, v6
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v34
-; GISEL-NEXT: v_or_b32_e32 v10, v10, v14
-; GISEL-NEXT: v_or_b32_e32 v14, v0, v12
-; GISEL-NEXT: v_or_b32_e32 v15, v1, v13
; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc
; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v30, v2
-; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v31, v3, vcc
-; GISEL-NEXT: v_or_b32_e32 v0, v26, v28
-; GISEL-NEXT: v_or_b32_e32 v1, v27, v29
-; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v32, v16, vcc
-; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v33, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v6
+; GISEL-NEXT: v_or_b32_e32 v2, v26, v28
+; GISEL-NEXT: v_or_b32_e32 v3, v27, v29
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GISEL-NEXT: v_and_b32_e32 v6, 1, v0
-; GISEL-NEXT: v_and_b32_e32 v12, v0, v20
-; GISEL-NEXT: v_and_b32_e32 v13, v0, v21
-; GISEL-NEXT: v_and_b32_e32 v34, v0, v4
-; GISEL-NEXT: v_and_b32_e32 v35, v0, v5
-; GISEL-NEXT: v_mov_b32_e32 v0, v6
-; GISEL-NEXT: v_mov_b32_e32 v1, v7
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v12
-; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v13, vcc
-; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v34, vcc
-; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v35, vcc
; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GISEL-NEXT: s_cbranch_execnz .LBB0_9
; GISEL-NEXT: ; %bb.10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB0_11: ; %Flow11
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[6:7], 1
; GISEL-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v15
+; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v7
; GISEL-NEXT: v_or_b32_e32 v10, v10, v4
-; GISEL-NEXT: v_or_b32_e32 v14, v0, v2
-; GISEL-NEXT: v_or_b32_e32 v15, v1, v3
+; GISEL-NEXT: v_or_b32_e32 v6, v0, v2
+; GISEL-NEXT: v_or_b32_e32 v7, v1, v3
; GISEL-NEXT: .LBB0_12: ; %Flow12
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
; GISEL-NEXT: v_xor_b32_e32 v3, v25, v24
-; GISEL-NEXT: v_xor_b32_e32 v7, v19, v18
+; GISEL-NEXT: v_xor_b32_e32 v12, v19, v18
; GISEL-NEXT: v_xor_b32_e32 v0, v22, v3
; GISEL-NEXT: v_xor_b32_e32 v1, v23, v3
; GISEL-NEXT: v_xor_b32_e32 v2, v8, v3
-; GISEL-NEXT: v_xor_b32_e32 v6, v9, v3
-; GISEL-NEXT: v_xor_b32_e32 v4, v14, v7
-; GISEL-NEXT: v_xor_b32_e32 v5, v15, v7
-; GISEL-NEXT: v_xor_b32_e32 v8, v10, v7
-; GISEL-NEXT: v_xor_b32_e32 v9, v11, v7
+; GISEL-NEXT: v_xor_b32_e32 v8, v9, v3
+; GISEL-NEXT: v_xor_b32_e32 v4, v6, v12
+; GISEL-NEXT: v_xor_b32_e32 v5, v7, v12
+; GISEL-NEXT: v_xor_b32_e32 v6, v10, v12
+; GISEL-NEXT: v_xor_b32_e32 v7, v11, v12
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v7
-; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v7, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v12
+; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v12, s[4:5]
; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc
-; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v8, v7, s[4:5]
-; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v9, v7, vcc
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v8, v3, vcc
+; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v12, s[4:5]
+; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v12, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%shl = sdiv <2 x i128> %lhs, %rhs
ret <2 x i128> %shl
@@ -1356,6 +1358,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v21, vcc
; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v35, vcc
; GISEL-NEXT: v_or_b32_e32 v2, v2, v34
+; GISEL-NEXT: ; implicit-def: $vgpr18_vgpr19_vgpr20_vgpr21
; GISEL-NEXT: v_mov_b32_e32 v19, v1
; GISEL-NEXT: v_mov_b32_e32 v18, v0
; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
@@ -1498,14 +1501,15 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], 1
; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v7
-; GISEL-NEXT: v_lshrrev_b32_e32 v30, 31, v1
+; GISEL-NEXT: v_lshrrev_b32_e32 v34, 31, v1
; GISEL-NEXT: v_lshl_b64 v[6:7], v[9:10], 1
; GISEL-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
; GISEL-NEXT: v_lshrrev_b32_e32 v9, 31, v10
+; GISEL-NEXT: ; implicit-def: $vgpr30_vgpr31_vgpr32_vgpr33
; GISEL-NEXT: v_add_i32_e32 v8, vcc, -1, v8
; GISEL-NEXT: v_addc_u32_e32 v11, vcc, -1, v11, vcc
; GISEL-NEXT: v_or_b32_e32 v16, v16, v4
-; GISEL-NEXT: v_or_b32_e32 v22, v22, v30
+; GISEL-NEXT: v_or_b32_e32 v22, v22, v34
; GISEL-NEXT: v_or_b32_e32 v0, v0, v9
; GISEL-NEXT: v_or_b32_e32 v9, v20, v6
; GISEL-NEXT: v_or_b32_e32 v10, v21, v7
@@ -2023,6 +2027,8 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-LABEL: v_srem_v2i128_vv:
; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill
; GISEL-NEXT: v_ashrrev_i32_e32 v28, 31, v3
; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v11
; GISEL-NEXT: v_mov_b32_e32 v19, 0x7f
@@ -2198,6 +2204,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v2, v48, vcc
; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v25, v49, vcc
; GISEL-NEXT: v_or_b32_e32 v18, v18, v39
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GISEL-NEXT: v_mov_b32_e32 v0, v22
; GISEL-NEXT: v_mov_b32_e32 v1, v23
; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
@@ -2358,16 +2365,17 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1
; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v21
; GISEL-NEXT: v_lshl_b64 v[52:53], v[24:25], 1
-; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v25
-; GISEL-NEXT: v_lshrrev_b32_e32 v25, 31, v15
+; GISEL-NEXT: v_lshl_b64 v[54:55], v[26:27], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v40, 31, v25
+; GISEL-NEXT: v_lshrrev_b32_e32 v41, 31, v15
; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
+; GISEL-NEXT: ; implicit-def: $vgpr24_vgpr25_vgpr26_vgpr27
; GISEL-NEXT: v_add_i32_e32 v36, vcc, -1, v36
; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc
; GISEL-NEXT: v_or_b32_e32 v20, v0, v2
; GISEL-NEXT: v_or_b32_e32 v21, v1, v3
-; GISEL-NEXT: v_or_b32_e32 v2, v26, v24
-; GISEL-NEXT: v_or_b32_e32 v3, v52, v25
+; GISEL-NEXT: v_or_b32_e32 v2, v54, v40
+; GISEL-NEXT: v_or_b32_e32 v3, v52, v41
; GISEL-NEXT: v_or_b32_e32 v14, v14, v22
; GISEL-NEXT: v_addc_u32_e32 v38, vcc, -1, v38, vcc
; GISEL-NEXT: v_addc_u32_e32 v39, vcc, -1, v39, vcc
@@ -2376,7 +2384,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_or_b32_e32 v0, v36, v38
; GISEL-NEXT: v_or_b32_e32 v1, v37, v39
; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v50, v2, vcc
-; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v51, v27, vcc
+; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v51, v55, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v22
; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
@@ -2384,13 +2392,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_and_b32_e32 v1, v0, v35
; GISEL-NEXT: v_and_b32_e32 v25, v0, v34
; GISEL-NEXT: v_and_b32_e32 v26, v0, v4
-; GISEL-NEXT: v_and_b32_e32 v52, v0, v5
+; GISEL-NEXT: v_and_b32_e32 v27, v0, v5
; GISEL-NEXT: v_sub_i32_e32 v24, vcc, v3, v1
; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v53, v25, vcc
; GISEL-NEXT: v_mov_b32_e32 v0, v22
; GISEL-NEXT: v_mov_b32_e32 v1, v23
; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v2, v26, vcc
-; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v52, vcc
+; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v55, v27, vcc
; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GISEL-NEXT: s_cbranch_execnz .LBB2_9
; GISEL-NEXT: ; %bb.10: ; %Flow
@@ -2455,6 +2463,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v28, vcc
; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v33, s[8:9]
; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v8, v33, vcc
+; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_setpc_b64 s[30:31]
%shl = srem <2 x i128> %lhs, %rhs
ret <2 x i128> %shl
@@ -3025,6 +3036,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v18, v39, vcc
; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v48, vcc
; GISEL-NEXT: v_or_b32_e32 v20, v20, v38
+; GISEL-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
; GISEL-NEXT: v_mov_b32_e32 v16, v24
; GISEL-NEXT: v_mov_b32_e32 v17, v25
; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
@@ -3167,16 +3179,17 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_lshl_b64 v[18:19], v[24:25], 1
; GISEL-NEXT: v_lshrrev_b32_e32 v26, 31, v25
; GISEL-NEXT: v_lshl_b64 v[50:51], v[28:29], 1
-; GISEL-NEXT: v_lshl_b64 v[30:31], v[30:31], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v28, 31, v29
-; GISEL-NEXT: v_lshrrev_b32_e32 v29, 31, v23
+; GISEL-NEXT: v_lshl_b64 v[52:53], v[30:31], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v54, 31, v29
+; GISEL-NEXT: v_lshrrev_b32_e32 v55, 31, v23
; GISEL-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
+; GISEL-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31
; GISEL-NEXT: v_add_i32_e32 v34, vcc, -1, v34
; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc
; GISEL-NEXT: v_or_b32_e32 v24, v16, v18
; GISEL-NEXT: v_or_b32_e32 v25, v17, v19
-; GISEL-NEXT: v_or_b32_e32 v18, v30, v28
-; GISEL-NEXT: v_or_b32_e32 v19, v50, v29
+; GISEL-NEXT: v_or_b32_e32 v18, v52, v54
+; GISEL-NEXT: v_or_b32_e32 v19, v50, v55
; GISEL-NEXT: v_or_b32_e32 v22, v22, v26
; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v36, vcc
; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc
@@ -3185,7 +3198,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_or_b32_e32 v16, v34, v36
; GISEL-NEXT: v_or_b32_e32 v17, v35, v37
; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v48, v18, vcc
-; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v49, v31, vcc
+; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v49, v53, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_ashrrev_i32_e32 v16, 31, v26
; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
@@ -3193,13 +3206,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_and_b32_e32 v17, v16, v12
; GISEL-NEXT: v_and_b32_e32 v29, v16, v13
; GISEL-NEXT: v_and_b32_e32 v30, v16, v14
-; GISEL-NEXT: v_and_b32_e32 v50, v16, v15
+; GISEL-NEXT: v_and_b32_e32 v31, v16, v15
; GISEL-NEXT: v_sub_i32_e32 v28, vcc, v19, v17
; GISEL-NEXT: v_subb_u32_e32 v29, vcc, v51, v29, vcc
; GISEL-NEXT: v_mov_b32_e32 v16, v26
; GISEL-NEXT: v_mov_b32_e32 v17, v27
; GISEL-NEXT: v_subb_u32_e32 v30, vcc, v18, v30, vcc
-; GISEL-NEXT: v_subb_u32_e32 v31, vcc, v31, v50, vcc
+; GISEL-NEXT: v_subb_u32_e32 v31, vcc, v53, v31, vcc
; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GISEL-NEXT: s_cbranch_execnz .LBB3_9
; GISEL-NEXT: ; %bb.10: ; %Flow
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll
index 9c59b4236cae4..628f1e18b33e1 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll
@@ -562,11 +562,12 @@ define i32 @divergent_vec_i16_HH(i32 %a, i32 %b) {
;
; GFX11-TRUE16-LABEL: divergent_vec_i16_HH:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: divergent_vec_i16_HH:
diff --git a/llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir b/llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir
index 186b171f4e805..588d3e0378502 100644
--- a/llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir
@@ -30,10 +30,11 @@ body: |
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub5:sgpr_256 = COPY [[COPY4]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub6:sgpr_256 = COPY [[COPY3]]
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub7:sgpr_256 = COPY [[COPY2]]
- ; GFX90A-NEXT: undef [[COPY11:%[0-9]+]].sub0:vreg_64_align2 = COPY [[COPY]]
- ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
- ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a [[COPY12]], [[COPY11]].sub0, [[COPY10]], 1, -1, 1, 0, 0, 0, implicit $exec, implicit [[COPY11]] :: (volatile dereferenceable load store (s32), addrspace 8)
- ; GFX90A-NEXT: $vgpr0 = COPY [[COPY12]]
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX90A-NEXT: [[DEF:%[0-9]+]].sub0:vreg_64_align2 = COPY [[COPY]]
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a [[COPY11]], [[DEF]].sub0, [[COPY10]], 1, -1, 1, 0, 0, 0, implicit $exec, implicit [[DEF]] :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[COPY11]]
; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%9:vgpr_32 = COPY $vgpr1
%8:vgpr_32 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
index 6a898fa799f3e..5c691c96de75d 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
@@ -52,11 +52,22 @@ define bfloat @v_copysign_bf16_bf16(bfloat %mag, bfloat %sign) {
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_copysign_bf16_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_copysign_bf16_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v3, v2
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_copysign_bf16_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%op = call bfloat @llvm.copysign.bf16(bfloat %mag, bfloat %sign)
ret bfloat %op
}
@@ -108,10 +119,13 @@ define bfloat @v_copysign_bf16_s_bf16(bfloat %mag, bfloat inreg %sign) {
;
; GFX11TRUE16-LABEL: v_copysign_bf16_s_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v2, v1
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_bf16_s_bf16:
@@ -170,10 +184,13 @@ define bfloat @v_copysign_s_bf16_bf16(bfloat inreg %mag, bfloat %sign) {
;
; GFX11TRUE16-LABEL: v_copysign_s_bf16_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v1, v0
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v2, v1
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_s_bf16_bf16:
@@ -233,10 +250,11 @@ define bfloat @v_copysign_bf16_f32(bfloat %mag, float %sign.f32) {
;
; GFX11TRUE16-LABEL: v_copysign_bf16_f32:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v1
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v2, v1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -300,10 +318,11 @@ define bfloat @v_copysign_bf16_f64(bfloat %mag, double %sign.f64) {
;
; GFX11TRUE16-LABEL: v_copysign_bf16_f64:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v2
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v1, v2
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -362,11 +381,22 @@ define bfloat @v_copysign_bf16_f16(bfloat %mag, half %sign.f16) {
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_copysign_bf16_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_copysign_bf16_f16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v3, v2
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_copysign_bf16_f16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%sign = bitcast half %sign.f16 to bfloat
%op = call bfloat @llvm.copysign.bf16(bfloat %mag, bfloat %sign)
ret bfloat %op
@@ -423,6 +453,8 @@ define amdgpu_ps i32 @s_copysign_bf16_bf16(bfloat inreg %mag, bfloat inreg %sign
;
; GFX11TRUE16-LABEL: s_copysign_bf16_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -499,6 +531,7 @@ define amdgpu_ps i32 @s_copysign_bf16_f32(bfloat inreg %mag, float inreg %sign.f
;
; GFX11TRUE16-LABEL: s_copysign_bf16_f32:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
@@ -575,6 +608,7 @@ define amdgpu_ps i32 @s_copysign_bf16_f64(bfloat inreg %mag, double inreg %sign.
;
; GFX11TRUE16-LABEL: s_copysign_bf16_f64:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
@@ -651,6 +685,8 @@ define amdgpu_ps i32 @s_copysign_bf16_f16(bfloat inreg %mag, half inreg %sign.f1
;
; GFX11TRUE16-LABEL: s_copysign_bf16_f16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -719,10 +755,11 @@ define float @v_copysign_f32_bf16(float %mag, bfloat %sign.bf16) {
;
; GFX11TRUE16-LABEL: v_copysign_f32_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v1
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v2
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_f32_bf16:
@@ -783,6 +820,7 @@ define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.b
;
; GFX11TRUE16-LABEL: s_copysign_f32_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s0, v0
@@ -850,11 +888,22 @@ define half @v_copysign_f16_bf16(half %mag, bfloat %sign.bf16) {
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_copysign_f16_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_copysign_f16_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v3, v2
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_copysign_f16_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%sign = bitcast bfloat %sign.bf16 to half
%op = call half @llvm.copysign.f16(half %mag, half %sign)
ret half %op
@@ -917,6 +966,8 @@ define amdgpu_ps i32 @s_copysign_f16_bf16(half inreg %mag, bfloat inreg %sign.bf
;
; GFX11TRUE16-LABEL: s_copysign_f16_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -985,10 +1036,11 @@ define double @v_copysign_f64_bf16(double %mag, bfloat %sign.bf16) {
;
; GFX11TRUE16-LABEL: v_copysign_f64_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, v2.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v2
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v3
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_f64_bf16:
@@ -1049,6 +1101,7 @@ define amdgpu_ps <2 x i32> @s_copysign_f64_bf16(double inreg %mag, bfloat inreg
;
; GFX11TRUE16-LABEL: s_copysign_f64_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s1, v0
@@ -3545,6 +3598,7 @@ define amdgpu_ps i32 @s_copysign_out_f32_mag_f32_sign_bf16(float inreg %mag, bfl
;
; GFX11TRUE16-LABEL: s_copysign_out_f32_mag_f32_sign_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s0, v0
@@ -3610,6 +3664,7 @@ define amdgpu_ps <2 x i32> @s_copysign_out_f64_mag_f64_sign_bf16(double inreg %m
;
; GFX11TRUE16-LABEL: s_copysign_out_f64_mag_f64_sign_bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s1, v0
@@ -3677,6 +3732,7 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_bf16_sign_f32(bfloat inreg %mag, f
;
; GFX11TRUE16-LABEL: s_copysign_out_bf16_mag_bf16_sign_f32:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
@@ -3744,6 +3800,7 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_bf16_sign_f64(bfloat inreg %mag, d
;
; GFX11TRUE16-LABEL: s_copysign_out_bf16_mag_bf16_sign_f64:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
@@ -3840,14 +3897,16 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_f32_sign_bf16(float inreg %mag, bf
; GFX11TRUE16-NEXT: s_add_i32 s2, s2, s0
; GFX11TRUE16-NEXT: s_bitset1_b32 s0, 22
; GFX11TRUE16-NEXT: s_addk_i32 s2, 0x7fff
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: s_and_b32 s3, s3, exec_lo
; GFX11TRUE16-NEXT: s_cselect_b32 s0, s0, s2
-; GFX11TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
; GFX11TRUE16-NEXT: s_lshr_b32 s0, s0, 16
+; GFX11TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
@@ -3997,10 +4056,11 @@ define <2 x float> @v_copysign_out_v2f32_mag_v2f32_sign_v2bf16(<2 x float> %mag,
;
; GFX11TRUE16-LABEL: v_copysign_out_v2f32_mag_v2f32_sign_v2bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v2
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v3
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -4071,10 +4131,11 @@ define <2 x double> @v_copysign_out_v2f64_mag_v2f64_sign_v2bf16(<2 x double> %ma
;
; GFX11TRUE16-LABEL: v_copysign_out_v2f64_mag_v2f64_sign_v2bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.h, v4.l
; GFX11TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v4
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.h, v4.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v5
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -4864,14 +4925,16 @@ define amdgpu_ps <2 x i32> @s_copysign_out_v2f32_mag_v2f32_sign_v2bf16(<2 x floa
; GFX11TRUE16-LABEL: s_copysign_out_v2f32_mag_v2f32_sign_v2bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s2
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s3
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s0, v0
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, s1, v1
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, s0, v1
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s1, v0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
; GFX11FAKE16-LABEL: s_copysign_out_v2f32_mag_v2f32_sign_v2bf16:
@@ -4961,6 +5024,8 @@ define amdgpu_ps <4 x i32> @s_copysign_out_v2f64_mag_v2f64_sign_v2bf16(<2 x doub
; GFX11TRUE16-LABEL: s_copysign_out_v2f64_mag_v2f64_sign_v2bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_lshr_b32 s5, s4, 16
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s4
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s5
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -5762,13 +5827,15 @@ define <3 x float> @v_copysign_out_v3f32_mag_v3f32_sign_v3bf16(<3 x float> %mag,
;
; GFX11TRUE16-LABEL: v_copysign_out_v3f32_mag_v3f32_sign_v3bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr6
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.h, v3.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.h, v4.l
; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v3
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v4
-; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v5
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.h, v3.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.h, v4.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v5
+; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v6
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_out_v3f32_mag_v3f32_sign_v3bf16:
@@ -5848,13 +5915,15 @@ define <3 x double> @v_copysign_out_v3f64_mag_v3f64_sign_v3bf16(<3 x double> %ma
;
; GFX11TRUE16-LABEL: v_copysign_out_v3f64_mag_v3f64_sign_v3bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr9
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.h, v6.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX11TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v6
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v7
-; GFX11TRUE16-NEXT: v_bfi_b32 v5, 0x7fffffff, v5, v8
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.h, v6.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.h, v7.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v8
+; GFX11TRUE16-NEXT: v_bfi_b32 v5, 0x7fffffff, v5, v9
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_out_v3f64_mag_v3f64_sign_v3bf16:
@@ -6024,15 +6093,16 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3f32_sign_v3bf16(<3 x float> %ma
; GFX11TRUE16-NEXT: v_add3_u32 v8, v8, v1, 0x7fff
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v5, v9, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v8, v10, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff7fff, v1, v3
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff7fff, v2, v4
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff7fff, v5, v4
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_out_v3bf16_mag_v3f32_sign_v3bf16:
@@ -6343,11 +6413,12 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3f64_sign_v3bf16(<3 x double> %m
; GFX11TRUE16-NEXT: v_add3_u32 v10, v14, v10, 0x7fff
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v8, v15, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f64_e32 vcc_lo, v[4:5], v[4:5]
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v9, v13, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff7fff, v1, v7
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff7fff, v4, v7
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v10, v11, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff7fff, v2, v6
@@ -6566,14 +6637,15 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f32(<3 x bfloat> %m
; GFX11TRUE16-NEXT: v_add3_u32 v8, v8, v3, 0x7fff
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v5, v9, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v6, v7, vcc_lo
; GFX11TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.h
; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v8, v10, vcc_lo
; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff7fff, v1, v2
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff7fff, v1, v5
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff7fff, v0, v3
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -6699,15 +6771,18 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64(<3 x bfloat> %m
;
; GFX11TRUE16-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v5
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff0000, v1, v3
+; GFX11TRUE16-NEXT: v_bfi_b32 v3, 0x7fff0000, v4, v3
; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fff0000, v2, v7
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -6905,12 +6980,14 @@ define <4 x float> @v_copysign_out_v4f32_mag_v4f32_sign_v4bf16(<4 x float> %mag,
;
; GFX11TRUE16-LABEL: v_copysign_out_v4f32_mag_v4f32_sign_v4bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr6
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr7
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v4
; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.h, v4.l
; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.h, v5.l
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v4
; GFX11TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v5
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v6
; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v7
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -7007,12 +7084,14 @@ define <4 x double> @v_copysign_out_v4f64_mag_v4f64_sign_v4bf16(<4 x double> %ma
;
; GFX11TRUE16-LABEL: v_copysign_out_v4f64_mag_v4f64_sign_v4bf16:
; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX11TRUE16-NEXT: ; implicit-def: $vgpr11
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v8
; GFX11TRUE16-NEXT: v_mov_b16_e32 v10.h, v8.l
; GFX11TRUE16-NEXT: v_mov_b16_e32 v11.h, v9.l
-; GFX11TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v8
; GFX11TRUE16-NEXT: v_bfi_b32 v7, 0x7fffffff, v7, v9
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v10
; GFX11TRUE16-NEXT: v_bfi_b32 v5, 0x7fffffff, v5, v11
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index 574c1042859aa..5d42165b8ae81 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -45,6 +45,8 @@ define amdgpu_ps i16 @s_copysign_f16(half inreg %mag, half inreg %sign) {
;
; GFX11-TRUE16-LABEL: s_copysign_f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -385,11 +387,22 @@ define half @v_copysign_f16(half %mag, half %sign) {
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_copysign_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_copysign_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v3, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_copysign_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.copysign.f16(half %mag, half %sign)
ret half %result
}
@@ -688,10 +701,11 @@ define float @v_copysign_out_f32_mag_f32_sign_f16(float %mag, half %sign) {
;
; GFX11-TRUE16-LABEL: v_copysign_out_f32_mag_f32_sign_f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v2
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_f32_mag_f32_sign_f16:
@@ -732,10 +746,11 @@ define double @v_copysign_out_f64_mag_f64_sign_f16(double %mag, half %sign) {
;
; GFX11-TRUE16-LABEL: v_copysign_out_f64_mag_f64_sign_f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v2.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v2
+; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v3
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_f64_mag_f64_sign_f16:
@@ -778,10 +793,11 @@ define half @v_copysign_out_f16_mag_f16_sign_f32(half %mag, float %sign) {
;
; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v2, v1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -825,10 +841,11 @@ define half @v_copysign_out_f16_mag_f16_sign_f64(half %mag, double %sign) {
;
; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v2
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v1, v2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,10 +889,12 @@ define half @v_copysign_out_f16_mag_f32_sign_f16(float %mag, half %sign) {
;
; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f32_sign_f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v2
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_f16_mag_f32_sign_f16:
@@ -1070,13 +1089,14 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, 0x7e00
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, 0x7c00, v5 :: v_dual_add_nc_u32 v3, v3, v4
; GFX11-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v4
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_f16_mag_f64_sign_f16:
@@ -1294,11 +1314,13 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f64_sign_f16(double inreg %mag, hal
; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffe
; GFX11-TRUE16-NEXT: v_med3_i32 v1, s3, 0, 13
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s3
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_or_b32 s3, s1, 0x1000
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, s4
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s5, s4
@@ -2732,6 +2754,7 @@ define amdgpu_ps i32 @s_copysign_out_f32_mag_f32_sign_f16(float inreg %mag, half
;
; GFX11-TRUE16-LABEL: s_copysign_out_f32_mag_f32_sign_f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s0, v0
@@ -2781,6 +2804,7 @@ define amdgpu_ps <2 x i32> @s_copysign_out_f64_mag_f64_sign_f16(double inreg %ma
;
; GFX11-TRUE16-LABEL: s_copysign_out_f64_mag_f64_sign_f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s1, v0
@@ -2832,6 +2856,7 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f16_sign_f32(half inreg %mag, float
;
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f16_sign_f32:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
@@ -2883,6 +2908,7 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f16_sign_f64(half inreg %mag, doubl
;
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f16_sign_f64:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
@@ -2934,10 +2960,11 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f32_sign_f16(float inreg %mag, half
;
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f32_sign_f16:
; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s0
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, s0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v1, v0
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -3050,10 +3077,11 @@ define <2 x float> @v_copysign_out_v2f32_mag_v2f32_sign_v2f16(<2 x float> %mag,
;
; GFX11-TRUE16-LABEL: v_copysign_out_v2f32_mag_v2f32_sign_v2f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v2
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v3
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -3106,10 +3134,11 @@ define <2 x double> @v_copysign_out_v2f64_mag_v2f64_sign_v2f16(<2 x double> %mag
;
; GFX11-TRUE16-LABEL: v_copysign_out_v2f64_mag_v2f64_sign_v2f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v4.l
; GFX11-TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v4.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v5
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -3858,14 +3887,16 @@ define amdgpu_ps <2 x i32> @s_copysign_out_v2f32_mag_v2f32_sign_v2f16(<2 x float
; GFX11-TRUE16-LABEL: s_copysign_out_v2f32_mag_v2f32_sign_v2f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_lshr_b32 s3, s2, 16
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s0, v0
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, s1, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, s0, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, s1, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v0
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
; GFX11-FAKE16-LABEL: s_copysign_out_v2f32_mag_v2f32_sign_v2f16:
@@ -3931,6 +3962,8 @@ define amdgpu_ps <4 x i32> @s_copysign_out_v2f64_mag_v2f64_sign_v2f16(<2 x doubl
; GFX11-TRUE16-LABEL: s_copysign_out_v2f64_mag_v2f64_sign_v2f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s4, 16
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -4670,13 +4703,15 @@ define <3 x float> @v_copysign_out_v3f32_mag_v3f32_sign_v3f16(<3 x float> %mag,
;
; GFX11-TRUE16-LABEL: v_copysign_out_v3f32_mag_v3f32_sign_v3f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v3.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v4.l
; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v3
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v4
-; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v3.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v4.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v5
+; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v6
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_v3f32_mag_v3f32_sign_v3f16:
@@ -4735,13 +4770,15 @@ define <3 x double> @v_copysign_out_v3f64_mag_v3f64_sign_v3f16(<3 x double> %mag
;
; GFX11-TRUE16-LABEL: v_copysign_out_v3f64_mag_v3f64_sign_v3f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v6.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX11-TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v6
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v7
-; GFX11-TRUE16-NEXT: v_bfi_b32 v5, 0x7fffffff, v5, v8
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v6.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v7.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v8
+; GFX11-TRUE16-NEXT: v_bfi_b32 v5, 0x7fffffff, v5, v9
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_v3f64_mag_v3f64_sign_v3f16:
@@ -5589,15 +5626,18 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f16_sign_v3f64(<3 x half> %mag, <3
;
; GFX11-TRUE16-LABEL: v_copysign_out_v3f16_mag_v3f16_sign_v3f64:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff0000, v1, v3
+; GFX11-TRUE16-NEXT: v_bfi_b32 v3, 0x7fff0000, v4, v3
; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff0000, v2, v7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -5757,12 +5797,14 @@ define <4 x float> @v_copysign_out_v4f32_mag_v4f32_sign_v4f16(<4 x float> %mag,
;
; GFX11-TRUE16-LABEL: v_copysign_out_v4f32_mag_v4f32_sign_v4f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v4.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v5.l
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v4
; GFX11-TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v5
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v0, v6
; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fffffff, v2, v7
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -5834,12 +5876,14 @@ define <4 x double> @v_copysign_out_v4f64_mag_v4f64_sign_v4f16(<4 x double> %mag
;
; GFX11-TRUE16-LABEL: v_copysign_out_v4f64_mag_v4f64_sign_v4f16:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v9.l
-; GFX11-TRUE16-NEXT: v_bfi_b32 v3, 0x7fffffff, v3, v8
; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fffffff, v7, v9
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v1, v10
; GFX11-TRUE16-NEXT: v_bfi_b32 v5, 0x7fffffff, v5, v11
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
index 0a2e758f7cf21..fc4de0645d217 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
@@ -999,6 +999,7 @@ define amdgpu_kernel void @s_test_copysign_f32_fpext_f16(ptr addrspace(1) %out,
; GFX11-LABEL: s_test_copysign_f32_fpext_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: ; implicit-def: $vgpr0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b16_e32 v0.h, s3
@@ -1094,6 +1095,7 @@ define amdgpu_kernel void @s_test_copysign_f32_fpext_bf16(ptr addrspace(1) %out,
; GFX11-LABEL: s_test_copysign_f32_fpext_bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: ; implicit-def: $vgpr0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b16_e32 v0.h, s3
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
index 8b5c34d97e50e..8a9a8c3f121d3 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
@@ -372,6 +372,7 @@ define amdgpu_kernel void @s_test_copysign_f64_f16(ptr addrspace(1) %out, [8 x i
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x74
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
+; GFX11-NEXT: ; implicit-def: $vgpr0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b16_e32 v0.h, s6
@@ -971,10 +972,11 @@ define double @v_test_copysign_f64_f16(ptr addrspace(1) %out, [8 x i32], double
;
; GFX11-LABEL: v_test_copysign_f64_f16:
; GFX11: ; %bb.0:
+; GFX11-NEXT: ; implicit-def: $vgpr1
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mov_b16_e32 v1.h, v20.l
; GFX11-NEXT: v_mov_b32_e32 v0, v10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_mov_b16_e32 v1.h, v20.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sign.ext = fpext half %sign to double
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index e7af7467171c3..a51790693ff16 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -98,19 +98,21 @@ define i128 @fptosi_f64_to_i128(double %x) {
; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v6, v0, v4, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v5, v1, v5, s[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v0, v4, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v6, v1, v5, s[6:7]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v10, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v10, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v10, v[1:2]
; SDAG-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v8, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v6, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4]
-; SDAG-NEXT: v_mad_i32_i24 v3, v9, v5, v3
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v8, v[2:3]
+; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v7, v[2:3]
+; SDAG-NEXT: v_mov_b32_e32 v4, v3
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v7, v[4:5]
+; SDAG-NEXT: v_mad_i32_i24 v3, v9, v6, v3
; SDAG-NEXT: .LBB0_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
; SDAG-NEXT: .LBB0_7: ; %Flow2
@@ -464,19 +466,21 @@ define i128 @fptoui_f64_to_i128(double %x) {
; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v6, v0, v4, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v5, v1, v5, s[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v0, v4, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v6, v1, v5, s[6:7]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v10, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v10, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v10, v[1:2]
; SDAG-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v8, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v6, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4]
-; SDAG-NEXT: v_mad_i32_i24 v3, v9, v5, v3
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v8, v[2:3]
+; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v7, v[2:3]
+; SDAG-NEXT: v_mov_b32_e32 v4, v3
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v7, v[4:5]
+; SDAG-NEXT: v_mad_i32_i24 v3, v9, v6, v3
; SDAG-NEXT: .LBB1_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
; SDAG-NEXT: .LBB1_7: ; %Flow2
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index c4a38dcd7b5f3..6b9e1febd6606 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -538,8 +538,9 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v3, v2
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB0_2
; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v3, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v5, v4
@@ -622,6 +623,7 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
@@ -770,8 +772,9 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s1, s0
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB0_2
; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s1, s0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -858,6 +861,7 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -1015,8 +1019,9 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s1, s0
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB0_2
; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s1, s0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -1106,6 +1111,7 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -5795,8 +5801,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_2
; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v5, v2
@@ -5879,6 +5886,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
@@ -5893,8 +5901,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_10
; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v8, v7
@@ -5977,6 +5986,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v5, v6, v5
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v5.l, v5
@@ -6233,8 +6243,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2
; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s6, s5
; GFX1150-TRUE16-NEXT: s_cselect_b32 s7, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6321,6 +6332,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s4
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6336,8 +6348,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10
; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s7
; GFX1150-TRUE16-NEXT: s_cselect_b32 s9, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6424,6 +6437,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s6
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6703,8 +6717,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2
; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s6, s5
; GFX1200-TRUE16-NEXT: s_cselect_b32 s7, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6794,6 +6809,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s4
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6812,8 +6828,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10
; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s7
; GFX1200-TRUE16-NEXT: s_cselect_b32 s9, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6904,6 +6921,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s6
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -8976,8 +8994,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_2
; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v7, v4
@@ -9060,6 +9079,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v4, v5, v4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v4.l, v4
@@ -9074,8 +9094,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_10
; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v10, v7
@@ -9158,6 +9179,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v7, v8, v7
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v7.l, v7
@@ -9169,8 +9191,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_18
; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else53
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v8, 0x7fff, v11, v8
@@ -9253,6 +9276,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v8, v9, v8
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v8.l, v8
@@ -9267,8 +9291,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_26
; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else86
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v9.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, 0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v9.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_bfi_b32 v11, 0x7fff, v14, v11
@@ -9351,6 +9376,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc_lo
; GFX11-TRUE16-NEXT: v_ldexp_f32 v11, v12, v11
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v9.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v11.l, v11
@@ -9817,8 +9843,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2
; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s6
; GFX1150-TRUE16-NEXT: s_cselect_b32 s9, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -9905,6 +9932,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s5
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -9920,8 +9948,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10
; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1150-TRUE16-NEXT: s_cselect_b32 s11, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -10008,6 +10037,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s8
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -10021,8 +10051,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18
; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else53
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1150-TRUE16-NEXT: s_cselect_b32 s11, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -10109,6 +10140,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, s7
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -10124,8 +10156,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26
; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else86
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s12, s11
; GFX1150-TRUE16-NEXT: s_cselect_b32 s13, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -10212,6 +10245,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc_lo
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v4, v3
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v4.l, s10
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v3.l, v3
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -10725,8 +10759,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2
; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s6
; GFX1200-TRUE16-NEXT: s_cselect_b32 s9, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -10816,6 +10851,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s5
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -10834,8 +10870,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10
; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1200-TRUE16-NEXT: s_cselect_b32 s11, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -10926,6 +10963,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s8
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -10941,8 +10979,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18
; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else53
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1200-TRUE16-NEXT: s_cselect_b32 s11, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
@@ -11034,6 +11073,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, s7
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -11052,8 +11092,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26
; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else86
-; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
+; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s12, s11
; GFX1200-TRUE16-NEXT: s_cselect_b32 s13, -1, 0
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -11144,6 +11185,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc_lo
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v4, v3
+; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v4.l, s10
; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v3.l, v3
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index f67ab18dd8ef1..5a4cc8892a422 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -17424,32 +17424,62 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: test_call_external_void_func_v1bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, s33
-; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_or_saveexec_b32 s1, -1
-; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
-; GFX11-NEXT: v_writelane_b32 v40, s0, 2
-; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16 at abs32@hi
-; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16 at abs32@lo
-; GFX11-NEXT: s_add_i32 s32, s32, 16
-; GFX11-NEXT: v_writelane_b32 v40, s30, 0
-; GFX11-NEXT: v_writelane_b32 v40, s31, 1
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s31, v40, 1
-; GFX11-NEXT: v_readlane_b32 s30, v40, 0
-; GFX11-NEXT: s_mov_b32 s32, s33
-; GFX11-NEXT: v_readlane_b32 s0, v40, 2
-; GFX11-NEXT: s_or_saveexec_b32 s1, -1
-; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
-; GFX11-NEXT: s_mov_b32 s33, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: test_call_external_void_func_v1bf16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
+; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
+; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v1bf16 at abs32@hi
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v1bf16 at abs32@lo
+; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
+; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v1
+; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
+; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
+; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
+; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
+; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: test_call_external_void_func_v1bf16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
+; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
+; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v1bf16 at abs32@hi
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v1bf16 at abs32@lo
+; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
+; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
+; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
+; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
+; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
+; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
+; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v1bf16:
; GFX10-SCRATCH: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll b/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
index 492bd1b508bc6..2f6e237b693d9 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
@@ -536,28 +536,29 @@ define <3 x bfloat> @v_uitofp_v3i1_to_v3bf16(<3 x i1> %num) {
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v1.l
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
-; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v2, 16, 1
; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v0, 16, 1
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_add3_u32 v1, v1, v2, 0x7fff
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v0, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
+; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v3, 16, 1
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v3
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v1, v7, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v1
-; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v1, 0x7fff
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v3, 0x7fff
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -614,31 +615,31 @@ define <3 x bfloat> @v_uitofp_v3i1_to_v3bf16(<3 x i1> %num) {
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
-; GFX12-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX12-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v1.l
-; GFX12-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
-; GFX12-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-TRUE16-NEXT: v_bfe_u32 v1, v2, 16, 1
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_bfe_u32 v4, v0, 16, 1
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
-; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, vcc_lo
+; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, vcc_lo
+; GFX12-TRUE16-NEXT: v_add3_u32 v1, v1, v2, 0x7fff
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
; GFX12-TRUE16-NEXT: v_add3_u32 v4, v4, v0, 0x7fff
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
+; GFX12-TRUE16-NEXT: v_bfe_u32 v6, v3, 16, 1
+; GFX12-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v3
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
-; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
+; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v2, v1, v7, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX12-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v1
-; GFX12-TRUE16-NEXT: v_add3_u32 v6, v6, v1, 0x7fff
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_add3_u32 v6, v6, v3, 0x7fff
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
-; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo
-; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
@@ -1862,28 +1863,29 @@ define <3 x bfloat> @v_sitofp_v3i1_to_v3bf16(<3 x i1> %num) {
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, -1.0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v1.l
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
-; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v2, 16, 1
; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v0, 16, 1
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1.0, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, -1.0, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_add3_u32 v1, v1, v2, 0x7fff
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v0, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
+; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v3, 16, 1
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v3
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v1, v7, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v1
-; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v1, 0x7fff
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v3, 0x7fff
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -1940,31 +1942,31 @@ define <3 x bfloat> @v_sitofp_v3i1_to_v3bf16(<3 x i1> %num) {
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, -1.0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
-; GFX12-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX12-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v1.l
-; GFX12-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
-; GFX12-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-TRUE16-NEXT: v_bfe_u32 v1, v2, 16, 1
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_bfe_u32 v4, v0, 16, 1
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
-; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1.0, vcc_lo
+; GFX12-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, -1.0, vcc_lo
+; GFX12-TRUE16-NEXT: v_add3_u32 v1, v1, v2, 0x7fff
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
; GFX12-TRUE16-NEXT: v_add3_u32 v4, v4, v0, 0x7fff
-; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
+; GFX12-TRUE16-NEXT: v_bfe_u32 v6, v3, 16, 1
+; GFX12-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v3
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
-; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
+; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v2, v1, v7, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX12-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v1
-; GFX12-TRUE16-NEXT: v_add3_u32 v6, v6, v1, 0x7fff
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_add3_u32 v6, v6, v3, 0x7fff
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
-; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo
-; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.h
; GFX12-TRUE16-NEXT: s_wait_alu 0xfffd
; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.h
diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll
index ab38bd21994ec..35eed55eae194 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll
@@ -1161,6 +1161,8 @@ define amdgpu_kernel void @idot4_acc16_vecMul(ptr addrspace(1) %src1,
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-DL-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-DL-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -1169,28 +1171,28 @@ define amdgpu_kernel void @idot4_acc16_vecMul(ptr addrspace(1) %src1,
; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3]
; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v3, s[4:5]
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8
-; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v4.h, 8, v1.l
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
-; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v7.h, 8, v2.l
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v6.h, 8, v1.l
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h
+; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v9.h, 8, v2.l
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.h
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v9.l, v7.l
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v1.h, 8, v1.h
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v2.h, 8, v2.h
-; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v4, v4, v7
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l
+; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v6, v6, v9
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v0.l
+; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v6.l, v0.l
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v2
-; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v4.h
+; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v6.h
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.h
@@ -3424,6 +3426,7 @@ define amdgpu_kernel void @idot4_nonstandard_signed(ptr addrspace(1) %src1,
; GFX11-DL-TRUE16: ; %bb.0: ; %entry
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-DL-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -3432,25 +3435,25 @@ define amdgpu_kernel void @idot4_nonstandard_signed(ptr addrspace(1) %src1,
; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[0:1]
; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3]
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v2
+; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v2
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v3.l
-; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v3
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v2.h
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.h
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.h
; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v1.l, v0.l
-; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v5.l
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
-; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 24, v2
+; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l
+; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 24, v2
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.h, v2.l, v0.l
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll
index 305461ed6b208..09487900587f6 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll
@@ -1963,6 +1963,7 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1,
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-DL-TRUE16-NEXT: ; implicit-def: $vgpr6
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -1974,23 +1975,23 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1,
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v4, 0, 8
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v4, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.h
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.l
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
-; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v6, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
+; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 24, v3
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.h
-; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
+; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v4
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v1.h, v0.l
-; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
+; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v7, 0, 8
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.h, v0.l
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 792d7db26d076..51802beed4368 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -850,15 +850,16 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(ptr addrspace(1) %out,
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x10
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e64 v2, 16, s4
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s4
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v1, 16, v2
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 31b6b533866d4..f49ddcdf345c5 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -5778,25 +5778,31 @@ define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
; GFX7-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
-; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v6
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[4:5]
+; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, v0, v6
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v2, 0
; GFX7-GISEL-NEXT: v_addc_u32_e32 v9, vcc, v4, v7, vcc
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v6
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v3, v[7:8]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
-; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v2, vcc
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v7, v[0:1]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v8, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v2, vcc
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v9, v[5:6]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v8, v[1:2]
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i64:
@@ -5834,25 +5840,31 @@ define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0
; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
-; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[4:5]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v0, v6
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v2, 0
; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, v4, v7, vcc
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v6
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v3, v[7:8]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
-; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v5
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 1, v5
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v2, vcc
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v7, v[0:1]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v8, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v2, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v9, v[5:6]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v8, v[1:2]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: clpeak_imad_pat_i64:
@@ -5886,25 +5898,31 @@ define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
; GFX900-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, v0, v6
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[4:5]
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v1, vcc, v0, v6
; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v1, v2, 0
; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, v4, v7, vcc
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v3, v[7:8]
; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
; GFX900-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v4, vcc
; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v7, vcc, 1, v5
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, 1, v5
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v7, v[0:1]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v8, 0
; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v9, v[5:6]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v8, v[1:2]
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-SDAG-LABEL: clpeak_imad_pat_i64:
@@ -5991,26 +6009,32 @@ define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_co_u32 v6, vcc_lo, v0, 1
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v7, null, 0, v1, vcc_lo
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v3, v[1:2]
-; GFX10-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, v6
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX10-GISEL-NEXT: v_add_co_u32 v1, vcc_lo, v0, v6
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v3, v[4:5]
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v7, v2, v[4:5]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v8, v2, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v1, v2, 0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v9, null, v4, v7, vcc_lo
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v6
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v3, v[1:2]
-; GFX10-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v10, null, 0, v4, vcc_lo
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v5, v8, 0
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX10-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v0, 1
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v4, vcc_lo
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v1, v3, v[7:8]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v5, v10, 0
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v9, v2, v[6:7]
-; GFX10-GISEL-NEXT: v_add_co_u32 v7, vcc_lo, v5, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v4
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v7, 0
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v9, null, 0, v6, vcc_lo
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v5, v10, v[2:3]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v3, v9, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v8, v[4:5]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v4, v7, v[1:2]
+; GFX10-GISEL-NEXT: v_add_co_u32 v9, vcc_lo, v5, 1
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v7, v4
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v9, 0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v12, null, 0, v6, vcc_lo
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v5, v11, v[7:8]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v6, v10, v[4:5]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v3, v12, v[7:8]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v1, v9, v[2:3]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: clpeak_imad_pat_i64:
@@ -6049,37 +6073,45 @@ define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
; GFX11-GISEL-LABEL: clpeak_imad_pat_i64:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_co_u32 v7, vcc_lo, v0, 1
+; GFX11-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v8, null, 0, v1, vcc_lo
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v7, v3, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v8, v2, v[4:5]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v7
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v4, v2, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v10, null, v5, v8, vcc_lo
-; GFX11-GISEL-NEXT: v_add_co_u32 v11, vcc_lo, v0, 1
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v12, null, 0, v5, vcc_lo
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v7
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v4, v3, v[1:2]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v6, v11, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v10, v2, v[7:8]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, v4
-; GFX11-GISEL-NEXT: v_add_co_u32 v9, vcc_lo, v6, 1
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v12, v[2:3]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v9, 0
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v10, null, 0, v8, vcc_lo
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v8, v11, v[4:5]
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v9, null, 0, v1, vcc_lo
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v8, v2, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX11-GISEL-NEXT: v_add_co_u32 v1, vcc_lo, v0, v8
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v3, v10, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v5, v9, v[6:7]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v3, v[4:5]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v9, v2, v[6:7]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v1, v2, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v11, null, v4, v9, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GFX11-GISEL-NEXT: v_add_co_u32 v12, vcc_lo, v0, 1
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v13, null, 0, v4, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v1, v3, v[7:8]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v5, v12, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v11, v2, v[9:10]
+; GFX11-GISEL-NEXT: v_add_co_u32 v11, vcc_lo, v5, 1
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v11, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v7, v4
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v6, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v5, v13, v[7:8]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v6, v12, v[9:10]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v3, v2, v[4:5]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v7, v11, v[8:9]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1200-SDAG-LABEL: clpeak_imad_pat_i64:
@@ -6410,50 +6442,60 @@ define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v0
; GFX7-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
-; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v2
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v2
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v14, v6, 0
; GFX7-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v3
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v5, v[2:3]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v10, v9
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v14, v7, v[10:11]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v4, v[1:2]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v15, v6, v[9:10]
; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v12
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v16, vcc, v8, v13, vcc
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
-; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, v2, v14
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v14
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v9, v15, vcc
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15]
-; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v8, vcc
-; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v2
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v3, v4, 0
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v16, vcc, v1, v13, vcc
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, v8, v14
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v5, v[11:12]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v17, v6, 0
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v2, v15, vcc
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[10:11]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v14, v13
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v7, v[14:15]
+; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v0
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[13:14]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v10, 0
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v6
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v9, vcc
-; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v10
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v7, v[0:1]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v11, 0
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v9
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v10, v[0:1]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v3, vcc
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v7
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1]
-; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v13
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
-; GFX7-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v3
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, v[0:1]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v13, 0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v12
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[2:3]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v15, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX7-GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v10, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v14, v[10:11]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v10, v3
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v16, v[10:11]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v13, v[4:5]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v15, v[10:11]
; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -6515,50 +6557,60 @@ define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 1, v0
; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
-; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v2
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v2
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v14, v6, 0
; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v3
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v5, v[2:3]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v10, v9
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v14, v7, v[10:11]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v4, v[1:2]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v15, v6, v[9:10]
; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, v0, v12
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, v8, v13, vcc
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
-; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, v2, v14
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v14
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v9, v15, vcc
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15]
-; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 1, v0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v8, vcc
-; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 1, v2
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v3, v4, 0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, v1, v13, vcc
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, v8, v14
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v5, v[11:12]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v17, v6, 0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v2, v15, vcc
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[10:11]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, v13
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v7, v[14:15]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 1, v0
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[13:14]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v10, 0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 1, v8
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v6
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v9, vcc
-; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v10
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v7, v[0:1]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v11, 0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 1, v9
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v10, v[0:1]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v3, vcc
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v7
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1]
-; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 1, v13
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v3
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, v[0:1]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v13, 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 1, v12
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[2:3]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v15, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v10, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v14, v[10:11]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v10, v3
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v16, v[10:11]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v13, v[4:5]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v15, v[10:11]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v5
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -6612,50 +6664,60 @@ define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v0
; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v2
; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v2
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v14, v6, 0
; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v5, v[2:3]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v10, v9
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v14, v7, v[10:11]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v4, v[1:2]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v15, v6, v[9:10]
; GFX900-GISEL-NEXT: v_add_co_u32_e32 v3, vcc, v0, v12
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v16, vcc, v8, v13, vcc
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v17, vcc, v2, v14
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v14
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v18, vcc, v9, v15, vcc
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15]
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v11, vcc, 1, v0
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v8, vcc
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v2
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v3, v4, 0
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v16, vcc, v1, v13, vcc
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v17, vcc, v8, v14
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v5, v[11:12]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v17, v6, 0
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v18, vcc, v2, v15, vcc
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[10:11]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v14, v13
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v7, v[14:15]
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 1, v0
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[13:14]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v10, 0
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v11, vcc, 1, v8
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v9, vcc
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v10
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v7, v[0:1]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v11, 0
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v13, vcc, 1, v9
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v10, v[0:1]
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v3, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1]
-; GFX900-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, 1, v13
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
-; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
-; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, v[0:1]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v13, 0
+; GFX900-GISEL-NEXT: v_add_co_u32_e32 v15, vcc, 1, v12
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[2:3]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v15, 0
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v16, vcc, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v10, v1
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v14, v[10:11]
+; GFX900-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v10, v3
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v16, v[10:11]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v13, v[4:5]
+; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v15, v[10:11]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v5
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -6811,44 +6873,56 @@ define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v12, v4, 0
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v15, null, 0, v3, vcc_lo
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v14, v6, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v12, v5, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v14, v7, v[3:4]
-; GFX10-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v0, v12
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr19_vgpr20
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v8, v1
+; GFX10-GISEL-NEXT: v_add_co_u32 v1, vcc_lo, v0, v12
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v10, v3
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v12, v5, v[8:9]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v14, v7, v[10:11]
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v13, v4, v[8:9]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v3, v4, 0
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v16, null, v10, v13, vcc_lo
-; GFX10-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v2, v14
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v1, v4, 0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v10, v13, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v21, vcc_lo, v2, v14
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v15, v6, v[9:10]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v17, v6, 0
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v18, null, v8, v15, vcc_lo
-; GFX10-GISEL-NEXT: v_add_co_u32 v19, vcc_lo, v0, 1
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v12
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v14
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v20, null, 0, v10, vcc_lo
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v11, v19, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v3, v5, v[0:1]
-; GFX10-GISEL-NEXT: v_add_co_u32 v15, vcc_lo, v2, 1
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v17, v7, v[1:2]
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v12, null, 0, v8, vcc_lo
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v13, v15, 0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v10
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v16, v4, v[14:15]
-; GFX10-GISEL-NEXT: v_add_co_u32 v14, vcc_lo, v11, 1
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v18, v6, v[0:1]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, v8
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v11, v20, v[1:2]
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v16, null, 0, v4, vcc_lo
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v13, v12, v[6:7]
-; GFX10-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v13, 1
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v9, v14, 0
-; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v18, null, 0, v5, vcc_lo
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v4, v19, v[10:11]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v7, v17, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v5, v15, v[11:12]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v9, v16, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v7, v18, v[3:4]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v12, v14, v[5:6]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v4, v17, v[7:8]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v21, v6, 0
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v22, null, v8, v15, vcc_lo
+; GFX10-GISEL-NEXT: v_add_co_u32 v23, vcc_lo, v0, 1
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr15_vgpr16
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v17, v14
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v15, v12
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v24, null, 0, v10, vcc_lo
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v11, v23, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v1, v5, v[15:16]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v21, v7, v[17:18]
+; GFX10-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v2, 1
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v15, null, 0, v8, vcc_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v19, v10
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v13, v17, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v3, v4, v[0:1]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v22, v6, v[14:15]
+; GFX10-GISEL-NEXT: v_add_co_u32 v6, vcc_lo, v11, 1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v11, v24, v[19:20]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v18, null, 0, v4, vcc_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v11, v8
+; GFX10-GISEL-NEXT: v_add_co_u32 v21, vcc_lo, v13, 1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v9, v6, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v13, v15, v[11:12]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v7, v21, 0
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr15_vgpr16
+; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v5, vcc_lo
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v4, v23, v[10:11]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v15, v3
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v13, v1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v7, v22, v[15:16]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v5, v17, v[11:12]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v9, v18, v[13:14]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v12, v6, v[4:5]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v3, v21, v[7:8]
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -6911,62 +6985,75 @@ define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i64:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_co_u32 v13, vcc_lo, v0, 1
+; GFX11-GISEL-NEXT: v_add_co_u32 v14, vcc_lo, v0, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v14, null, 0, v1, vcc_lo
-; GFX11-GISEL-NEXT: v_add_co_u32 v15, vcc_lo, v2, 1
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v13, v4, 0
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v16, null, 0, v3, vcc_lo
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v15, v6, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v13, v5, v[1:2]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v15, v7, v[3:4]
-; GFX11-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v0, v13
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v14, v4, v[8:9]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v16, v6, v[9:10]
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v17, null, v10, v14, vcc_lo
-; GFX11-GISEL-NEXT: v_add_co_u32 v18, vcc_lo, v2, v15
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v3, v4, 0
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v19, null, v11, v16, vcc_lo
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v18, v6, 0
-; GFX11-GISEL-NEXT: v_add_co_u32 v20, vcc_lo, v0, 1
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v21, null, 0, v10, vcc_lo
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v9
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v8, v20, 0
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v13
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v3, v5, v[0:1]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v10
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v18, v7, v[1:2]
-; GFX11-GISEL-NEXT: v_add_co_u32 v18, vcc_lo, v2, 1
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v11, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v15, null, 0, v1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v16, vcc_lo, v2, 1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v14, v4, 0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v17, null, 0, v3, vcc_lo
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[15:16], null, v17, v4, v[13:14]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v12, v18, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v19, v6, v[14:15]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v21, v[0:1]
-; GFX11-GISEL-NEXT: v_add_co_u32 v14, vcc_lo, v8, 1
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v16, null, 0, v15, vcc_lo
-; GFX11-GISEL-NEXT: v_add_co_u32 v17, vcc_lo, v12, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v12, v22, v[5:6]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v9, v14, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v4, v17, 0
-; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v19, null, 0, v10, vcc_lo
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v15, v20, v[6:7]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v10, v18, v[7:8]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v16, v6, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr19_vgpr20
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v8, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v10, v3
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v14, v5, v[8:9]
+; GFX11-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v0, v14
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v16, v7, v[10:11]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v15, v4, v[12:13]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v17, v6, v[8:9]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v3, v4, 0
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v21, null, v9, v15, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v22, vcc_lo, v2, v16
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr15_vgpr16
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v23, null, v10, v17, vcc_lo
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v15, v12
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v22, v6, 0
+; GFX11-GISEL-NEXT: v_add_co_u32 v24, vcc_lo, v0, 1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v5, v[15:16]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v25, null, 0, v9, vcc_lo
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v9, v16, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v4, v19, v[3:4]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v11, v14, v[7:8]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v12, v17, v[8:9]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v17, v14
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v11, v24, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v22, v7, v[17:18]
+; GFX11-GISEL-NEXT: v_add_co_u32 v18, vcc_lo, v2, 1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[15:16], null, v21, v4, v[0:1]
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v12, null, 0, v10, vcc_lo
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v13, v18, 0
+; GFX11-GISEL-NEXT: v_add_co_u32 v21, vcc_lo, v11, 1
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v19, v9
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v23, v6, v[14:15]
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v15, vcc_lo
+; GFX11-GISEL-NEXT: v_add_co_u32 v23, vcc_lo, v13, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v11, v25, v[19:20]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v8, v21, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v4, v23, 0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v10, v5
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v9, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[16:17], null, v13, v12, v[10:11]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v15, v24, v[6:7]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v11, v3
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v9, v18, v[16:17]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v8, v22, v[5:6]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v4, v26, v[11:12]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v10, v21, v[14:15]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v13, v23, v[7:8]
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index b84fb520e0519..c1e4ff7f09a72 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -839,26 +839,51 @@ define float @test_sext_cvt_f32_fp8(i16 %a) {
; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
-; GFX12-LABEL: test_sext_cvt_f32_fp8:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-TRUE16-LABEL: test_sext_cvt_f32_fp8:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX12-TRUE16-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_sext_cvt_f32_fp8:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX12-FAKE16-LABEL: test_sext_cvt_f32_fp8:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
+; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: test_sext_cvt_f32_fp8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX1250-TRUE16-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: test_sext_cvt_f32_fp8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-FAKE16-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a.sext, i32 1)
ret float %ret
@@ -872,26 +897,51 @@ define float @test_sext_cvt_f32_bf8(i16 %a) {
; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
-; GFX12-LABEL: test_sext_cvt_f32_bf8:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-TRUE16-LABEL: test_sext_cvt_f32_bf8:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX12-TRUE16-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_sext_cvt_f32_bf8:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX12-FAKE16-LABEL: test_sext_cvt_f32_bf8:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
+; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: test_sext_cvt_f32_bf8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX1250-TRUE16-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: test_sext_cvt_f32_bf8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-FAKE16-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a.sext, i32 1)
ret float %ret
@@ -905,26 +955,51 @@ define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
-; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-TRUE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX12-TRUE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_sext_cvt_pk_f32_bf8_word1:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX12-FAKE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
+; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: test_sext_cvt_pk_f32_bf8_word1:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
ret <2 x float> %ret
@@ -938,26 +1013,51 @@ define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
; GFX9X-NEXT: s_setpc_b64 s[30:31]
;
-; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-TRUE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX12-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_sext_cvt_pk_f32_fp8_word0:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX12-FAKE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-TRUE16-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX1250-TRUE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: test_sext_cvt_pk_f32_fp8_word0:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-FAKE16-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%a.sext = sext i16 %a to i32
%ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)
ret <2 x float> %ret
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
index d45705edce2c8..89133a690b856 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -98,9 +98,10 @@ define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_gather4 v[0:3], v[2:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -120,9 +121,11 @@ define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
+; GFX12-TRUE16-NEXT: image_gather4 v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -166,9 +169,10 @@ define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_gather4 v[0:3], v[2:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -188,9 +192,11 @@ define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX12-TRUE16-NEXT: image_gather4 v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -300,9 +306,10 @@ define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_gather4_cl v[0:3], v[2:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -322,9 +329,11 @@ define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_gather4_cl v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -370,9 +379,11 @@ define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -390,9 +401,11 @@ define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -436,9 +449,12 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_gather4_b v[0:3], v[2:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -457,8 +473,10 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_gather4_b v[0:3], [v2, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -503,8 +521,10 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_gather4_c_b v[0:3], [v3, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -523,8 +543,10 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_gather4_c_b v[0:3], [v3, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -570,9 +592,11 @@ define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_gather4_b_cl v[0:3], v[2:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
@@ -593,9 +617,13 @@ define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_gather4_b_cl v[0:3], [v2, v1, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -642,9 +670,13 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_gather4_c_b_cl v[0:3], [v3, v1, v2, v5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -662,9 +694,13 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_gather4_c_b_cl v[0:3], [v3, v1, v2, v5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -700,9 +736,10 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
;
; GFX11-TRUE16-LABEL: gather4_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: image_gather4_l v[0:3], v[2:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
@@ -716,8 +753,10 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
;
; GFX12-TRUE16-LABEL: gather4_l_2d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX12-TRUE16-NEXT: image_gather4_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-TRUE16-NEXT: image_gather4_l v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -752,8 +791,10 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in
;
; GFX11-TRUE16-LABEL: gather4_c_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
-; GFX11-TRUE16-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX11-TRUE16-NEXT: image_gather4_c_l v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -766,8 +807,10 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in
;
; GFX12-TRUE16-LABEL: gather4_c_l_2d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
-; GFX12-TRUE16-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-TRUE16-NEXT: image_gather4_c_l v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
index 3d64ef16a3c8c..0e2579f0a1232 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
@@ -285,9 +285,10 @@ main_body:
define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %fragid) {
; GFX11-TRUE16-LABEL: load_2dmsaa_a16:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l ; encoding: [0x01,0x39,0x04,0x7f]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l ; encoding: [0x02,0x39,0x06,0x7e]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l ; encoding: [0x00,0x39,0x04,0x7e]
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l ; encoding: [0x01,0x39,0x04,0x7f]
; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x98,0x01,0x61,0xf0,0x02,0x00,0x00,0x00]
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-TRUE16-NEXT: ; return to shader part epilog
@@ -301,8 +302,10 @@ define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16
;
; GFX12-TRUE16-LABEL: load_2dmsaa_a16:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l ; encoding: [0x01,0x39,0x00,0x7f]
-; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l ; encoding: [0x02,0x39,0x06,0x7e]
+; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00]
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
index 437f438efc554..866037e18de1a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -25,23 +25,45 @@ define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: s_mov_b32 s12, exec_lo
-; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: s_mov_b32 s12, exec_lo
-; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-TRUE16-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-FAKE16-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-TRUE16-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-FAKE16-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -139,9 +161,10 @@ define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_sample v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -161,9 +184,11 @@ define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX12-TRUE16-NEXT: image_sample v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -207,9 +232,10 @@ define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_sample v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -229,9 +255,11 @@ define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
+; GFX12-TRUE16-NEXT: image_sample v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -341,9 +369,10 @@ define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_sample v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -363,9 +392,11 @@ define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX12-TRUE16-NEXT: image_sample v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -402,23 +433,45 @@ define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: s_mov_b32 s12, exec_lo
-; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: s_mov_b32 s12, exec_lo
-; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-NEXT: image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-TRUE16-NEXT: image_sample_c v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-FAKE16-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-TRUE16-NEXT: image_sample_c v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-FAKE16-NEXT: image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -582,9 +635,10 @@ define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_sample_cl v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -604,9 +658,11 @@ define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_cl v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -718,9 +774,11 @@ define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_sample_c_cl v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -738,9 +796,11 @@ define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_c_cl v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -777,23 +837,49 @@ define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_b_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: s_mov_b32 s12, exec_lo
-; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_b_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: s_mov_b32 s12, exec_lo
-; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_b_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-TRUE16-NEXT: image_sample_b v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_b_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-FAKE16-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_b_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-TRUE16-NEXT: image_sample_b v[0:3], [v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_b_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-FAKE16-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -825,9 +911,12 @@ define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_sample_b v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -846,8 +935,10 @@ define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_b v[0:3], [v2, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -884,23 +975,49 @@ define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_b_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: s_mov_b32 s12, exec_lo
-; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_b_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: s_mov_b32 s12, exec_lo
-; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_b_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-TRUE16-NEXT: image_sample_c_b v[0:3], [v2, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_b_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX11-FAKE16-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_b_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-TRUE16-NEXT: image_sample_c_b v[0:3], [v2, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_b_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: s_mov_b32 s12, exec_lo
+; GFX12-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-FAKE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX12-FAKE16-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -933,8 +1050,10 @@ define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_sample_c_b v[0:3], [v3, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -953,8 +1072,10 @@ define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_c_b v[0:3], [v3, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -998,9 +1119,12 @@ define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: image_sample_b_cl v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1019,8 +1143,10 @@ define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_b_cl v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: image_sample_b_cl v[0:3], [v2, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1066,9 +1192,11 @@ define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-TRUE16-NEXT: image_sample_b_cl v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
@@ -1089,9 +1217,13 @@ define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_b_cl v[0:3], [v2, v1, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1136,8 +1268,10 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v3, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1156,8 +1290,10 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v3, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1204,9 +1340,13 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX11-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v3, v1, v2, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1224,9 +1364,13 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: s_mov_b32 s12, exec_lo
; GFX12-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX12-TRUE16-NEXT: s_and_b32 exec_lo, exec_lo, s12
-; GFX12-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_c_b_cl v[0:3], [v3, v1, v2, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1257,17 +1401,41 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_d_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_d_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_d_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -1364,12 +1532,17 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX11-TRUE16-LABEL: sample_d_3d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v6.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v5.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[8:9]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v4, v3, v6, v[8:9]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1384,13 +1557,16 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX12-TRUE16-LABEL: sample_d_3d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, v6.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
-; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v[7:9]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v4, v3, v[7:9]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1422,17 +1598,41 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_d_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_d_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v2, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_d_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v2, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -1518,8 +1718,13 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
;
; GFX11-TRUE16-LABEL: sample_d_cl_1d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
-; GFX11-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v3.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1533,7 +1738,11 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX12-TRUE16-LABEL: sample_d_cl_1d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
-; GFX12-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1570,9 +1779,10 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
;
; GFX11-TRUE16-LABEL: sample_d_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v5.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v5.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
@@ -1592,10 +1802,12 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
;
; GFX12-TRUE16-LABEL: sample_d_cl_2d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX12-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1631,7 +1843,11 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX11-TRUE16-LABEL: sample_c_d_cl_1d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
-; GFX11-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v2, v4, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1645,7 +1861,11 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX12-TRUE16-LABEL: sample_c_d_cl_1d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
-; GFX12-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v2, v4, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1684,10 +1904,12 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
;
; GFX11-TRUE16-LABEL: sample_c_d_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v7.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
-; GFX11-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1702,9 +1924,10 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
;
; GFX12-TRUE16-LABEL: sample_c_d_cl_2d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.h, v6.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, v7.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
-; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.h, v6.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
; GFX12-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v[7:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
@@ -1790,9 +2013,10 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX11-TRUE16-LABEL: sample_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: image_sample_l v[0:3], v[2:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
@@ -1806,8 +2030,10 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX12-TRUE16-LABEL: sample_l_2d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX12-TRUE16-NEXT: image_sample_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-TRUE16-NEXT: image_sample_l v[0:3], [v0, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1890,8 +2116,10 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
;
; GFX11-TRUE16-LABEL: sample_c_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
-; GFX11-TRUE16-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX11-TRUE16-NEXT: image_sample_c_l v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1904,8 +2132,10 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
;
; GFX12-TRUE16-LABEL: sample_c_l_2d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
-; GFX12-TRUE16-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-TRUE16-NEXT: image_sample_c_l v[0:3], [v0, v1, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -1933,17 +2163,33 @@ define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_lz_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_lz_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_lz_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: image_sample_lz v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_lz_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_lz_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: image_sample_lz v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_lz_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -2010,17 +2256,33 @@ define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_lz_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_lz_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_lz_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: image_sample_c_lz v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_lz_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_lz_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: image_sample_c_lz v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_lz_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -2102,9 +2364,10 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32>
;
; GFX11-TRUE16-LABEL: sample_c_d_o_2darray_V1:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v6.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
; GFX11-TRUE16-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[8:9]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
@@ -2122,9 +2385,10 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32>
;
; GFX12-TRUE16-LABEL: sample_c_d_o_2darray_V1:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, v6.l
-; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.h, v5.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
@@ -2173,9 +2437,10 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4
;
; GFX11-TRUE16-LABEL: sample_c_d_o_2darray_V2:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v6.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
; GFX11-TRUE16-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[8:9]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
@@ -2193,9 +2458,10 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4
;
; GFX12-TRUE16-LABEL: sample_c_d_o_2darray_V2:
; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, v6.l
-; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.h, v7.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.h, v5.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
@@ -2266,3 +2532,6 @@ declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
index e7b048dda1c1f..ad31829ab059d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
@@ -12,17 +12,37 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_d_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xe4,0xf0,0x00,0x00,0x00,0x08]
-; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_d_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x00]
-; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l ; encoding: [0x01,0x39,0x06,0x7e]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l ; encoding: [0x00,0x39,0x02,0x7e]
+; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xe4,0xf0,0x01,0x00,0x00,0x08,0x03,0x02,0x00,0x00]
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_d_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xe4,0xf0,0x00,0x00,0x00,0x08]
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l ; encoding: [0x01,0x39,0x06,0x7e]
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l ; encoding: [0x00,0x39,0x02,0x7e]
+; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xce,0xe7,0x00,0x00,0x00,0x04,0x01,0x03,0x02,0x00]
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x00]
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -86,9 +106,13 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX11-TRUE16-LABEL: sample_d_3d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l ; encoding: [0x04,0x39,0x06,0x7f]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v5.l ; encoding: [0x05,0x39,0x12,0x7e]
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l ; encoding: [0x02,0x39,0x08,0x7e]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l ; encoding: [0x01,0x39,0x00,0x7f]
-; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x0f,0xe4,0xf0,0x00,0x00,0x00,0x08,0x02,0x03,0x05,0x06]
+; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v4, v3, v9, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x0f,0xe4,0xf0,0x00,0x00,0x00,0x08,0x04,0x03,0x09,0x06]
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -103,8 +127,10 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX12-TRUE16-LABEL: sample_d_3d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l ; encoding: [0x04,0x39,0x06,0x7f]
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l ; encoding: [0x01,0x39,0x00,0x7f]
-; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x40,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x02,0x03,0x05]
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l ; encoding: [0x02,0x39,0x08,0x7e]
+; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v4, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x40,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x04,0x03,0x05]
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -127,17 +153,37 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_d_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xe8,0xf0,0x00,0x00,0x00,0x08]
-; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_d_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x80,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x03]
-; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l ; encoding: [0x02,0x39,0x08,0x7e]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l ; encoding: [0x01,0x39,0x04,0x7e]
+; GFX11-TRUE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v2, v4, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xe8,0xf0,0x00,0x00,0x00,0x08,0x02,0x04,0x03,0x00]
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_d_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xe8,0xf0,0x00,0x00,0x00,0x08]
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l ; encoding: [0x02,0x39,0x08,0x7e]
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l ; encoding: [0x01,0x39,0x04,0x7e]
+; GFX12-TRUE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v2, v4, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x80,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x02,0x04,0x03]
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x80,0xce,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x03]
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -195,17 +241,37 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_d_cl_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x7c,0xf1,0x00,0x00,0x00,0x08]
-; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_d_cl_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xd7,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x03]
-; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_cl_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l ; encoding: [0x01,0x39,0x08,0x7e]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l ; encoding: [0x00,0x39,0x02,0x7e]
+; GFX11-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v1, v4, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0x7c,0xf1,0x01,0x00,0x00,0x08,0x04,0x02,0x03,0x00]
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_d_cl_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x7c,0xf1,0x00,0x00,0x00,0x08]
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_cl_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l ; encoding: [0x01,0x39,0x08,0x7e]
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l ; encoding: [0x00,0x39,0x02,0x7e]
+; GFX12-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v1, v4, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xd7,0xe7,0x00,0x00,0x00,0x04,0x01,0x04,0x02,0x03]
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_cl_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xd7,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x03]
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -263,17 +329,37 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_d_cl_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x50,0xf1,0x00,0x00,0x00,0x08]
-; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_d_cl_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x00,0xd5,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x03]
-; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_cl_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l ; encoding: [0x02,0x39,0x0a,0x7e]
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l ; encoding: [0x01,0x39,0x04,0x7e]
+; GFX11-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v2, v5, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0x50,0xf1,0x00,0x00,0x00,0x08,0x02,0x05,0x03,0x04]
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_d_cl_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x50,0xf1,0x00,0x00,0x00,0x08]
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_cl_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l ; encoding: [0x02,0x39,0x0a,0x7e]
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l ; encoding: [0x01,0x39,0x04,0x7e]
+; GFX12-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v2, v5, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x00,0xd5,0xe7,0x00,0x00,0x00,0x04,0x00,0x02,0x05,0x03]
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_cl_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x00,0xd5,0xe7,0x00,0x00,0x00,0x04,0x00,0x01,0x02,0x03]
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -446,3 +532,6 @@ declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
index 45cebaf449d54..428f7d65c12a4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
@@ -12,17 +12,37 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_d_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_d_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_d_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -86,9 +106,13 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX11-TRUE16-LABEL: sample_d_3d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v5.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX11-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v4, v3, v9, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -103,8 +127,10 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX12-TRUE16-LABEL: sample_d_3d:
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX12-TRUE16-NEXT: image_sample_d_g16 v[0:3], [v0, v4, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;
@@ -127,17 +153,37 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_d_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_d_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v2, v4, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_d_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v2, v4, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -195,17 +241,37 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_d_cl_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_d_cl_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_cl_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v1, v4, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_d_cl_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_cl_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: image_sample_d_cl_g16 v[0:3], [v1, v4, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_cl_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -263,17 +329,37 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: sample_c_d_cl_1d:
-; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: sample_c_d_cl_1d:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_cl_1d:
+; GFX11-TRUE16: ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v2, v5, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: sample_c_d_cl_1d:
+; GFX11-FAKE16: ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_cl_1d:
+; GFX12-TRUE16: ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v2, v5, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_cl_1d:
+; GFX12-FAKE16: ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
@@ -444,3 +530,6 @@ declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
index c905e38cba443..2d9189d570937 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
@@ -494,15 +494,48 @@ main_body:
}
define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
-; GFX10PLUS-LABEL: sample_d_1d_g16_nortn:
-; GFX10PLUS: ; %bb.0: ; %main_body
-; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10PLUS-NEXT: s_endpgm
+; GFX10-LABEL: sample_d_1d_g16_nortn:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_endpgm
;
-; GFX12-LABEL: sample_d_1d_g16_nortn:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX12-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: sample_d_1d_g16_nortn:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %main_body
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: image_sample_d_g16 off, [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: sample_d_1d_g16_nortn:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %main_body
+; GFX11-SDAG-FAKE16-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: sample_d_1d_g16_nortn:
+; GFX11-GISEL: ; %bb.0: ; %main_body
+; GFX11-GISEL-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-TRUE16-LABEL: sample_d_1d_g16_nortn:
+; GFX12-SDAG-TRUE16: ; %bb.0: ; %main_body
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: image_sample_d_g16 off, [v1, v3, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX12-SDAG-FAKE16-LABEL: sample_d_1d_g16_nortn:
+; GFX12-SDAG-FAKE16: ; %bb.0: ; %main_body
+; GFX12-SDAG-FAKE16-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: sample_d_1d_g16_nortn:
+; GFX12-GISEL: ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
@@ -534,11 +567,6 @@ declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10: {{.*}}
; GFX11: {{.*}}
; GFX11-GISEL-FAKE16: {{.*}}
-; GFX11-SDAG-FAKE16: {{.*}}
-; GFX11-SDAG-TRUE16: {{.*}}
; GFX12-GISEL-FAKE16: {{.*}}
-; GFX12-SDAG-FAKE16: {{.*}}
-; GFX12-SDAG-TRUE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
index 03158f1141969..d69406e01da56 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
@@ -428,11 +428,20 @@ define amdgpu_ps void @dpp8_i16(i16 %in, ptr addrspace(1) %out) {
; GFX10-NEXT: global_store_short v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: dpp8_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
+; GFX11-SDAG-LABEL: dpp8_i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b16 v[1:2], v3, off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-GISEL-NEXT: s_endpgm
;
; GFX12-LABEL: dpp8_i16:
; GFX12: ; %bb.0:
@@ -621,11 +630,20 @@ define amdgpu_ps void @dpp8_half(half %in, ptr addrspace(1) %out) {
; GFX10-NEXT: global_store_short v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: dpp8_half:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
+; GFX11-SDAG-LABEL: dpp8_half:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-SDAG-NEXT: global_store_b16 v[1:2], v3, off
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: dpp8_half:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-GISEL-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-GISEL-NEXT: s_endpgm
;
; GFX12-LABEL: dpp8_half:
; GFX12: ; %bb.0:
@@ -646,8 +664,11 @@ define amdgpu_ps void @dpp8_bfloat(bfloat %in, ptr addrspace(1) %out) {
;
; GFX11-LABEL: dpp8_bfloat:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: ; implicit-def: $vgpr3
+; GFX11-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_dpp v3, v3 dpp8:[1,0,0,0,0,0,0,0]
+; GFX11-NEXT: global_store_b16 v[1:2], v3, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: dpp8_bfloat:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
index 4c6095ee594b0..d3ec936d786a3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -8551,15 +8551,27 @@ define void @v_permlane16_half(ptr addrspace(1) %out, half %src0, i32 %src1, i32
; GFX10-NEXT: global_store_short v[0:1], v2, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_permlane16_half:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v3
-; GFX11-NEXT: v_readfirstlane_b32 s1, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-LABEL: v_permlane16_half:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlane16_b32 v5, v5, s0, s1
+; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v5, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_permlane16_half:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_permlane16_b32 v2, v2, s0, s1
+; GFX11-GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_permlane16_half:
; GFX12: ; %bb.0:
@@ -8590,15 +8602,27 @@ define void @v_permlanex16_half(ptr addrspace(1) %out, half %src0, i32 %src1, i3
; GFX10-NEXT: global_store_short v[0:1], v2, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_permlanex16_half:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v3
-; GFX11-NEXT: v_readfirstlane_b32 s1, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-LABEL: v_permlanex16_half:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlanex16_b32 v5, v5, s0, s1
+; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v5, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_permlanex16_half:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_permlanex16_b32 v2, v2, s0, s1
+; GFX11-GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_permlanex16_half:
; GFX12: ; %bb.0:
@@ -8631,12 +8655,14 @@ define void @v_permlane16_bfloat(ptr addrspace(1) %out, bfloat %src0, i32 %src1,
;
; GFX11-LABEL: v_permlane16_bfloat:
; GFX11: ; %bb.0:
+; GFX11-NEXT: ; implicit-def: $vgpr5
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-NEXT: v_mov_b16_e32 v5.l, v2.l
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: v_permlane16_b32 v5, v5, s0, s1
+; GFX11-NEXT: global_store_b16 v[0:1], v5, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_permlane16_bfloat:
@@ -8670,12 +8696,14 @@ define void @v_permlanex16_bfloat(ptr addrspace(1) %out, bfloat %src0, i32 %src1
;
; GFX11-LABEL: v_permlanex16_bfloat:
; GFX11: ; %bb.0:
+; GFX11-NEXT: ; implicit-def: $vgpr5
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-NEXT: v_mov_b16_e32 v5.l, v2.l
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: v_permlanex16_b32 v5, v5, s0, s1
+; GFX11-NEXT: global_store_b16 v[0:1], v5, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_permlanex16_bfloat:
@@ -8707,15 +8735,27 @@ define void @v_permlane16_i16(ptr addrspace(1) %out, i16 %src0, i32 %src1, i32 %
; GFX10-NEXT: global_store_short v[0:1], v2, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_permlane16_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v3
-; GFX11-NEXT: v_readfirstlane_b32 s1, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-LABEL: v_permlane16_i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlane16_b32 v5, v5, s0, s1
+; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v5, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_permlane16_i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_permlane16_b32 v2, v2, s0, s1
+; GFX11-GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_permlane16_i16:
; GFX12: ; %bb.0:
@@ -8746,15 +8786,27 @@ define void @v_permlanex16_i16(ptr addrspace(1) %out, i16 %src0, i32 %src1, i32
; GFX10-NEXT: global_store_short v[0:1], v2, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_permlanex16_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v3
-; GFX11-NEXT: v_readfirstlane_b32 s1, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-LABEL: v_permlanex16_i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlanex16_b32 v5, v5, s0, s1
+; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v5, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_permlanex16_i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v3
+; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_permlanex16_b32 v2, v2, s0, s1
+; GFX11-GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_permlanex16_i16:
; GFX12: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
index 6dd2258420998..5d4bfc30b6515 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
@@ -353,8 +353,11 @@ define amdgpu_kernel void @test_v_f64(ptr addrspace(1) %out, double %src0) #1 {
define void @test_half(ptr addrspace(1) %out, half %src0) {
; GFX11-SDAG-LABEL: test_half:
; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr3
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v2
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v3
; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -372,15 +375,21 @@ define void @test_half(ptr addrspace(1) %out, half %src0) {
define void @test_bfloat(ptr addrspace(1) %out, bfloat %src0) {
; GFX11-SDAG-LABEL: test_bfloat:
; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr3
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v2
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v3
; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_bfloat:
; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr3
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_permlane64_b32 v2, v2
+; GFX11-GISEL-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_permlane64_b32 v2, v3
; GFX11-GISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%v = call bfloat @llvm.amdgcn.permlane64.bf16(bfloat %src0)
@@ -391,8 +400,11 @@ define void @test_bfloat(ptr addrspace(1) %out, bfloat %src0) {
define void @test_i16(ptr addrspace(1) %out, i16 %src0) {
; GFX11-SDAG-LABEL: test_i16:
; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: ; implicit-def: $vgpr3
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v2
+; GFX11-SDAG-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_permlane64_b32 v2, v3
; GFX11-SDAG-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll
index ec7d7d467ffc6..9b8707f5a1508 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll
@@ -31,7 +31,9 @@ define amdgpu_ps void @buffer_store_bf16(ptr addrspace(8) inreg %rsrc, bfloat %d
;
; GFX11-LABEL: buffer_store_bf16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: buffer_store_b16 v0, v1, s[0:3], 0 offen
+; GFX11-NEXT: ; implicit-def: $vgpr2
+; GFX11-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-NEXT: buffer_store_b16 v2, v1, s[0:3], 0 offen
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: buffer_store_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waitcnt.out.order.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waitcnt.out.order.ll
index f0031dd3e93c0..78473ec697a5a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waitcnt.out.order.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waitcnt.out.order.ll
@@ -85,9 +85,10 @@ define amdgpu_ps <3 x float> @sample_gather(<8 x i32> inreg %rsrc, <4 x i32> inr
define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <8 x i32> inreg %rsrc2, i16 %s.16, i16 %t.16, i16 %fragid) {
; GFX11-TRUE16-LABEL: sample_load:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0
; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
; GFX11-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
@@ -105,9 +106,10 @@ define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX1150-TRUE16-LABEL: sample_load:
; GFX1150-TRUE16: ; %bb.0:
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, 0
; GFX1150-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -127,9 +129,11 @@ define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX12-TRUE16-LABEL: sample_load:
; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, 0
-; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
+; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
@@ -159,9 +163,10 @@ define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg
define amdgpu_ps <3 x float> @load_sample(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <8 x i32> inreg %rsrc2, i16 %s.16, i16 %t.16, i16 %fragid) {
; GFX11-TRUE16-LABEL: load_sample:
; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0
; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
; GFX11-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
@@ -179,9 +184,10 @@ define amdgpu_ps <3 x float> @load_sample(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX1150-TRUE16-LABEL: load_sample:
; GFX1150-TRUE16: ; %bb.0:
+; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, 0
; GFX1150-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -201,9 +207,11 @@ define amdgpu_ps <3 x float> @load_sample(<8 x i32> inreg %rsrc, <4 x i32> inreg
;
; GFX12-TRUE16-LABEL: load_sample:
; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, 0
-; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
+; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
index a10c861601c2c..9e3aa29f3dc73 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -2074,10 +2074,12 @@ define void @test_writelane_half(ptr addrspace(1) %out, half %src, i32 %src1) {
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
-; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1100-SDAG-NEXT: ; implicit-def: $vgpr5
; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1100-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2230,10 +2232,12 @@ define void @test_writelane_bfloat(ptr addrspace(1) %out, bfloat %src, i32 %src1
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
-; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1100-SDAG-NEXT: ; implicit-def: $vgpr5
; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1100-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -2266,10 +2270,12 @@ define void @test_writelane_bfloat(ptr addrspace(1) %out, bfloat %src, i32 %src1
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
-; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1100-GISEL-NEXT: ; implicit-def: $vgpr5
; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1100-GISEL-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v5
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2308,10 +2314,12 @@ define void @test_writelane_i16(ptr addrspace(1) %out, i16 %src, i32 %src1) {
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
-; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1100-SDAG-NEXT: ; implicit-def: $vgpr5
; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1100-SDAG-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index 22f562ab8557b..3eade74cccf3e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -215,9 +215,11 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
;
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i8:
; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
index b241b9b800d2a..b055fa8494438 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
@@ -833,6 +833,7 @@ define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) #0 {
; GFX11-TRUE16-NEXT: s_clause 0x1
; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
@@ -995,11 +996,12 @@ define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
; GFX11-TRUE16-NEXT: s_clause 0x1
; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_lshr_b32 s3, s2, 16
; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.h, s3
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, s3
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_sub_f16_e32 v1.l, s2, v0.l
; GFX11-TRUE16-NEXT: v_sub_f16_e32 v1.h, s3, v0.h
@@ -1007,13 +1009,14 @@ define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s6, |v1.l|, 0.5
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s7, |v1.h|, 0.5
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0, 0x3c00, s6
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, 0, 0x3c00, s7
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, 0, 0x3c00, s6
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, 0, 0x3c00, s7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff, v2, v1
-; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v3, v4
+; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff, v3, v1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v4, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.h, v0.h, v2.l
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 5b2213592f495..579a8dd1e2c19 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -5831,17 +5831,17 @@ define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v4i1_to_v4i64:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
-; GFX6-NEXT: s_mov_b32 s10, s6
-; GFX6-NEXT: s_mov_b32 s11, s7
+; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s10, s2
+; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s8, s2
-; GFX6-NEXT: s_mov_b32 s9, s3
+; GFX6-NEXT: s_mov_b32 s8, s6
+; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b32 s0, s4
+; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 3, v0
@@ -5854,8 +5854,8 @@ define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v4i1_to_v4i64:
@@ -6215,11 +6215,12 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
-; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
+; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX8-NEXT: v_mov_b32_e32 v17, s1
; GFX8-NEXT: v_mov_b32_e32 v16, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_readfirstlane_b32 s3, v0
+; GFX8-NEXT: v_readfirstlane_b32 s3, v2
; GFX8-NEXT: s_lshr_b32 s2, s3, 6
; GFX8-NEXT: s_lshr_b32 s4, s3, 7
; GFX8-NEXT: s_lshr_b32 s6, s3, 4
@@ -6326,9 +6327,10 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out
; GFX12-LABEL: constant_sextload_v8i1_to_v8i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT: v_mov_b32_e32 v16, 0
+; GFX12-NEXT: v_mov_b32_e32 v18, 0
+; GFX12-NEXT: ; implicit-def: $vgpr16_vgpr17
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_u8 v0, v16, s[2:3]
+; GFX12-NEXT: global_load_u8 v0, v18, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s3, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6359,18 +6361,19 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out
; GFX12-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
; GFX12-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GFX12-NEXT: s_clause 0x3
-; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
-; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
-; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
-; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
+; GFX12-NEXT: global_store_b128 v18, v[0:3], s[0:1] offset:48
+; GFX12-NEXT: global_store_b128 v18, v[4:7], s[0:1] offset:32
+; GFX12-NEXT: global_store_b128 v18, v[8:11], s[0:1] offset:16
+; GFX12-NEXT: global_store_b128 v18, v[12:15], s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: constant_sextload_v8i1_to_v8i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v16, 0
+; GFX1250-NEXT: v_mov_b32_e32 v18, 0
+; GFX1250-NEXT: ; implicit-def: $vgpr16_vgpr17
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u8 v0, v16, s[2:3]
+; GFX1250-NEXT: global_load_u8 v0, v18, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_readfirstlane_b32 s3, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6399,10 +6402,10 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out
; GFX1250-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_ashrrev_i32 v13, 31, v12
; GFX1250-NEXT: v_mov_b32_e32 v15, s15
; GFX1250-NEXT: s_clause 0x3
-; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1]
+; GFX1250-NEXT: global_store_b128 v18, v[0:3], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v18, v[4:7], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v18, v[8:11], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v18, v[12:15], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = sext <8 x i1> %load to <8 x i64>
@@ -6823,6 +6826,7 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
+; GFX8-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX8-NEXT: v_mov_b32_e32 v19, s1
; GFX8-NEXT: v_mov_b32_e32 v18, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -7035,6 +7039,7 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX12-NEXT: global_load_u16 v0, v32, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s3, v0
+; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX12-NEXT: s_lshr_b32 s4, s3, 15
; GFX12-NEXT: s_lshr_b32 s2, s3, 14
; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
@@ -7106,6 +7111,7 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_load_u16 v0, v32, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_readfirstlane_b32 s3, v0
+; GFX1250-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mov_b32_e32 v28, s3
; GFX1250-NEXT: s_lshr_b32 s2, s3, 14
@@ -7799,16 +7805,16 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshr_b32 s38, s4, 30
+; GFX6-NEXT: s_lshr_b32 s36, s4, 30
; GFX6-NEXT: s_lshr_b32 s40, s4, 31
-; GFX6-NEXT: s_lshr_b32 s34, s4, 28
-; GFX6-NEXT: s_lshr_b32 s36, s4, 29
-; GFX6-NEXT: s_lshr_b32 s28, s4, 26
-; GFX6-NEXT: s_lshr_b32 s30, s4, 27
-; GFX6-NEXT: s_lshr_b32 s24, s4, 24
-; GFX6-NEXT: s_lshr_b32 s26, s4, 25
+; GFX6-NEXT: s_lshr_b32 s30, s4, 28
+; GFX6-NEXT: s_lshr_b32 s38, s4, 29
+; GFX6-NEXT: s_lshr_b32 s26, s4, 26
+; GFX6-NEXT: s_lshr_b32 s34, s4, 27
+; GFX6-NEXT: s_lshr_b32 s22, s4, 24
+; GFX6-NEXT: s_lshr_b32 s28, s4, 25
; GFX6-NEXT: s_lshr_b32 s20, s4, 22
-; GFX6-NEXT: s_lshr_b32 s22, s4, 23
+; GFX6-NEXT: s_lshr_b32 s24, s4, 23
; GFX6-NEXT: s_lshr_b32 s18, s4, 20
; GFX6-NEXT: s_lshr_b32 s6, s4, 21
; GFX6-NEXT: s_lshr_b32 s8, s4, 18
@@ -7821,48 +7827,48 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX6-NEXT: v_mov_b32_e32 v0, s44
; GFX6-NEXT: v_mov_b32_e32 v1, s45
; GFX6-NEXT: s_lshr_b32 s44, s4, 12
-; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v2, s38
-; GFX6-NEXT: v_mov_b32_e32 v3, s39
-; GFX6-NEXT: s_lshr_b32 s38, s4, 13
+; GFX6-NEXT: v_mov_b32_e32 v2, s36
+; GFX6-NEXT: v_mov_b32_e32 v3, s37
+; GFX6-NEXT: s_lshr_b32 s36, s4, 13
; GFX6-NEXT: v_mov_b32_e32 v4, s40
; GFX6-NEXT: v_mov_b32_e32 v5, s41
; GFX6-NEXT: s_lshr_b32 s40, s4, 10
-; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v6, s34
-; GFX6-NEXT: v_mov_b32_e32 v7, s35
-; GFX6-NEXT: s_lshr_b32 s34, s4, 11
-; GFX6-NEXT: v_mov_b32_e32 v8, s36
-; GFX6-NEXT: v_mov_b32_e32 v9, s37
-; GFX6-NEXT: s_lshr_b32 s36, s4, 8
-; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v10, s28
-; GFX6-NEXT: v_mov_b32_e32 v11, s29
-; GFX6-NEXT: s_lshr_b32 s28, s4, 9
-; GFX6-NEXT: v_mov_b32_e32 v12, s30
-; GFX6-NEXT: v_mov_b32_e32 v13, s31
-; GFX6-NEXT: s_lshr_b32 s30, s4, 6
-; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v6, s30
+; GFX6-NEXT: v_mov_b32_e32 v7, s31
+; GFX6-NEXT: s_lshr_b32 s30, s4, 11
+; GFX6-NEXT: v_mov_b32_e32 v8, s38
+; GFX6-NEXT: v_mov_b32_e32 v9, s39
+; GFX6-NEXT: s_lshr_b32 s38, s4, 8
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v14, s24
-; GFX6-NEXT: v_mov_b32_e32 v15, s25
-; GFX6-NEXT: s_lshr_b32 s24, s4, 7
-; GFX6-NEXT: v_mov_b32_e32 v16, s26
-; GFX6-NEXT: v_mov_b32_e32 v17, s27
-; GFX6-NEXT: s_lshr_b32 s26, s4, 4
+; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v10, s26
+; GFX6-NEXT: v_mov_b32_e32 v11, s27
+; GFX6-NEXT: s_lshr_b32 s26, s4, 9
+; GFX6-NEXT: v_mov_b32_e32 v12, s34
+; GFX6-NEXT: v_mov_b32_e32 v13, s35
+; GFX6-NEXT: s_lshr_b32 s34, s4, 6
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v14, s22
+; GFX6-NEXT: v_mov_b32_e32 v15, s23
+; GFX6-NEXT: s_lshr_b32 s22, s4, 7
+; GFX6-NEXT: v_mov_b32_e32 v16, s28
+; GFX6-NEXT: v_mov_b32_e32 v17, s29
+; GFX6-NEXT: s_lshr_b32 s28, s4, 4
+; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s20
; GFX6-NEXT: v_mov_b32_e32 v3, s21
; GFX6-NEXT: s_lshr_b32 s20, s4, 5
-; GFX6-NEXT: v_mov_b32_e32 v4, s22
-; GFX6-NEXT: v_mov_b32_e32 v5, s23
-; GFX6-NEXT: s_lshr_b32 s22, s4, 2
+; GFX6-NEXT: v_mov_b32_e32 v4, s24
+; GFX6-NEXT: v_mov_b32_e32 v5, s25
+; GFX6-NEXT: s_lshr_b32 s24, s4, 2
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -7872,16 +7878,16 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX6-NEXT: s_lshr_b32 s4, s4, 1
; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
@@ -7917,36 +7923,36 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s44
; GFX6-NEXT: v_mov_b32_e32 v3, s45
-; GFX6-NEXT: v_mov_b32_e32 v4, s38
-; GFX6-NEXT: v_mov_b32_e32 v5, s39
+; GFX6-NEXT: v_mov_b32_e32 v4, s36
+; GFX6-NEXT: v_mov_b32_e32 v5, s37
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s40
; GFX6-NEXT: v_mov_b32_e32 v3, s41
-; GFX6-NEXT: v_mov_b32_e32 v4, s34
-; GFX6-NEXT: v_mov_b32_e32 v5, s35
+; GFX6-NEXT: v_mov_b32_e32 v4, s30
+; GFX6-NEXT: v_mov_b32_e32 v5, s31
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s36
-; GFX6-NEXT: v_mov_b32_e32 v3, s37
-; GFX6-NEXT: v_mov_b32_e32 v4, s28
-; GFX6-NEXT: v_mov_b32_e32 v5, s29
+; GFX6-NEXT: v_mov_b32_e32 v2, s38
+; GFX6-NEXT: v_mov_b32_e32 v3, s39
+; GFX6-NEXT: v_mov_b32_e32 v4, s26
+; GFX6-NEXT: v_mov_b32_e32 v5, s27
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s30
-; GFX6-NEXT: v_mov_b32_e32 v3, s31
-; GFX6-NEXT: v_mov_b32_e32 v4, s24
-; GFX6-NEXT: v_mov_b32_e32 v5, s25
+; GFX6-NEXT: v_mov_b32_e32 v2, s34
+; GFX6-NEXT: v_mov_b32_e32 v3, s35
+; GFX6-NEXT: v_mov_b32_e32 v4, s22
+; GFX6-NEXT: v_mov_b32_e32 v5, s23
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s26
-; GFX6-NEXT: v_mov_b32_e32 v3, s27
+; GFX6-NEXT: v_mov_b32_e32 v2, s28
+; GFX6-NEXT: v_mov_b32_e32 v3, s29
; GFX6-NEXT: v_mov_b32_e32 v4, s20
; GFX6-NEXT: v_mov_b32_e32 v5, s21
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s22
-; GFX6-NEXT: v_mov_b32_e32 v3, s23
+; GFX6-NEXT: v_mov_b32_e32 v2, s24
+; GFX6-NEXT: v_mov_b32_e32 v3, s25
; GFX6-NEXT: v_mov_b32_e32 v4, s18
; GFX6-NEXT: v_mov_b32_e32 v5, s19
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
@@ -7962,22 +7968,22 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_lshr_b32 s44, s2, 30
-; GFX8-NEXT: s_lshr_b32 s46, s2, 31
-; GFX8-NEXT: s_lshr_b32 s48, s2, 28
-; GFX8-NEXT: s_lshr_b32 s50, s2, 29
-; GFX8-NEXT: s_lshr_b32 s52, s2, 26
-; GFX8-NEXT: s_lshr_b32 s54, s2, 27
-; GFX8-NEXT: s_lshr_b32 s56, s2, 24
-; GFX8-NEXT: s_lshr_b32 s58, s2, 25
+; GFX8-NEXT: s_lshr_b32 s36, s2, 30
+; GFX8-NEXT: s_lshr_b32 s38, s2, 31
+; GFX8-NEXT: s_lshr_b32 s40, s2, 28
+; GFX8-NEXT: s_lshr_b32 s44, s2, 29
+; GFX8-NEXT: s_lshr_b32 s48, s2, 26
+; GFX8-NEXT: s_lshr_b32 s50, s2, 27
+; GFX8-NEXT: s_lshr_b32 s52, s2, 24
+; GFX8-NEXT: s_lshr_b32 s56, s2, 25
; GFX8-NEXT: s_lshr_b32 s60, s2, 22
; GFX8-NEXT: s_lshr_b32 s62, s2, 23
; GFX8-NEXT: s_lshr_b32 s64, s2, 20
; GFX8-NEXT: s_lshr_b32 s66, s2, 21
-; GFX8-NEXT: s_lshr_b32 s42, s2, 18
-; GFX8-NEXT: s_lshr_b32 s40, s2, 19
-; GFX8-NEXT: s_lshr_b32 s38, s2, 16
-; GFX8-NEXT: s_lshr_b32 s36, s2, 17
+; GFX8-NEXT: s_lshr_b32 s58, s2, 18
+; GFX8-NEXT: s_lshr_b32 s54, s2, 19
+; GFX8-NEXT: s_lshr_b32 s46, s2, 16
+; GFX8-NEXT: s_lshr_b32 s42, s2, 17
; GFX8-NEXT: s_lshr_b32 s34, s2, 14
; GFX8-NEXT: s_lshr_b32 s30, s2, 15
; GFX8-NEXT: s_lshr_b32 s28, s2, 12
@@ -8009,94 +8015,92 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX8-NEXT: v_mov_b32_e32 v0, s44
-; GFX8-NEXT: s_add_u32 s44, s0, 0xf0
-; GFX8-NEXT: v_mov_b32_e32 v1, s45
-; GFX8-NEXT: s_addc_u32 s45, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s44
-; GFX8-NEXT: v_mov_b32_e32 v2, s46
-; GFX8-NEXT: v_mov_b32_e32 v3, s47
-; GFX8-NEXT: v_mov_b32_e32 v5, s45
-; GFX8-NEXT: s_add_u32 s44, s0, 0xe0
+; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX8-NEXT: v_mov_b32_e32 v0, s36
+; GFX8-NEXT: s_add_u32 s36, s0, 0xf0
+; GFX8-NEXT: v_mov_b32_e32 v1, s37
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
+; GFX8-NEXT: v_mov_b32_e32 v2, s38
+; GFX8-NEXT: v_mov_b32_e32 v3, s39
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
+; GFX8-NEXT: s_add_u32 s36, s0, 0xe0
+; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
+; GFX8-NEXT: v_mov_b32_e32 v0, s40
+; GFX8-NEXT: v_mov_b32_e32 v1, s41
+; GFX8-NEXT: v_mov_b32_e32 v2, s44
+; GFX8-NEXT: v_mov_b32_e32 v3, s45
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
+; GFX8-NEXT: s_add_u32 s36, s0, 0xd0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_addc_u32 s45, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s44
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
; GFX8-NEXT: v_mov_b32_e32 v0, s48
; GFX8-NEXT: v_mov_b32_e32 v1, s49
; GFX8-NEXT: v_mov_b32_e32 v2, s50
; GFX8-NEXT: v_mov_b32_e32 v3, s51
-; GFX8-NEXT: v_mov_b32_e32 v5, s45
-; GFX8-NEXT: s_add_u32 s44, s0, 0xd0
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
+; GFX8-NEXT: s_add_u32 s36, s0, 0xc0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_addc_u32 s45, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s44
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
; GFX8-NEXT: v_mov_b32_e32 v0, s52
; GFX8-NEXT: v_mov_b32_e32 v1, s53
-; GFX8-NEXT: v_mov_b32_e32 v2, s54
-; GFX8-NEXT: v_mov_b32_e32 v3, s55
-; GFX8-NEXT: v_mov_b32_e32 v5, s45
-; GFX8-NEXT: s_add_u32 s44, s0, 0xc0
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_addc_u32 s45, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s44
-; GFX8-NEXT: v_mov_b32_e32 v0, s56
-; GFX8-NEXT: v_mov_b32_e32 v1, s57
-; GFX8-NEXT: v_mov_b32_e32 v2, s58
-; GFX8-NEXT: v_mov_b32_e32 v3, s59
-; GFX8-NEXT: v_mov_b32_e32 v5, s45
-; GFX8-NEXT: s_add_u32 s44, s0, 0xb0
+; GFX8-NEXT: v_mov_b32_e32 v2, s56
+; GFX8-NEXT: v_mov_b32_e32 v3, s57
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
+; GFX8-NEXT: s_add_u32 s36, s0, 0xb0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_addc_u32 s45, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s44
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
; GFX8-NEXT: v_mov_b32_e32 v0, s60
; GFX8-NEXT: v_mov_b32_e32 v1, s61
; GFX8-NEXT: v_mov_b32_e32 v2, s62
; GFX8-NEXT: v_mov_b32_e32 v3, s63
-; GFX8-NEXT: v_mov_b32_e32 v5, s45
-; GFX8-NEXT: s_add_u32 s44, s0, 0xa0
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
+; GFX8-NEXT: s_add_u32 s36, s0, 0xa0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_addc_u32 s45, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s44
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
; GFX8-NEXT: v_mov_b32_e32 v0, s64
; GFX8-NEXT: v_mov_b32_e32 v1, s65
; GFX8-NEXT: v_mov_b32_e32 v2, s66
; GFX8-NEXT: v_mov_b32_e32 v3, s67
-; GFX8-NEXT: v_mov_b32_e32 v5, s45
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_nop 0
-; GFX8-NEXT: v_mov_b32_e32 v2, s40
-; GFX8-NEXT: s_add_u32 s40, s0, 0x90
-; GFX8-NEXT: v_mov_b32_e32 v3, s41
-; GFX8-NEXT: s_addc_u32 s41, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v4, s40
-; GFX8-NEXT: v_mov_b32_e32 v0, s42
-; GFX8-NEXT: v_mov_b32_e32 v1, s43
-; GFX8-NEXT: v_mov_b32_e32 v5, s41
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
+; GFX8-NEXT: s_add_u32 s36, s0, 0x90
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: s_nop 0
-; GFX8-NEXT: v_mov_b32_e32 v2, s36
+; GFX8-NEXT: s_addc_u32 s37, s1, 0
+; GFX8-NEXT: v_mov_b32_e32 v4, s36
+; GFX8-NEXT: v_mov_b32_e32 v0, s58
+; GFX8-NEXT: v_mov_b32_e32 v1, s59
+; GFX8-NEXT: v_mov_b32_e32 v2, s54
+; GFX8-NEXT: v_mov_b32_e32 v3, s55
+; GFX8-NEXT: v_mov_b32_e32 v5, s37
; GFX8-NEXT: s_add_u32 s36, s0, 0x80
-; GFX8-NEXT: v_mov_b32_e32 v3, s37
+; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s37, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s36
-; GFX8-NEXT: v_mov_b32_e32 v0, s38
-; GFX8-NEXT: v_mov_b32_e32 v1, s39
+; GFX8-NEXT: v_mov_b32_e32 v0, s46
+; GFX8-NEXT: v_mov_b32_e32 v1, s47
+; GFX8-NEXT: v_mov_b32_e32 v2, s42
+; GFX8-NEXT: v_mov_b32_e32 v3, s43
; GFX8-NEXT: v_mov_b32_e32 v5, s37
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
@@ -8374,38 +8378,38 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshr_b32 s34, s2, 30
-; GFX12-NEXT: s_lshr_b32 s36, s2, 31
-; GFX12-NEXT: s_lshr_b32 s38, s2, 28
-; GFX12-NEXT: s_lshr_b32 s40, s2, 29
-; GFX12-NEXT: s_lshr_b32 s42, s2, 26
-; GFX12-NEXT: s_lshr_b32 s44, s2, 27
-; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX12-NEXT: s_lshr_b32 s24, s2, 30
+; GFX12-NEXT: s_lshr_b32 s28, s2, 31
+; GFX12-NEXT: s_lshr_b32 s34, s2, 28
+; GFX12-NEXT: s_lshr_b32 s36, s2, 29
+; GFX12-NEXT: s_lshr_b32 s38, s2, 26
+; GFX12-NEXT: s_lshr_b32 s42, s2, 27
+; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX12-NEXT: s_lshr_b32 s46, s2, 24
; GFX12-NEXT: s_lshr_b32 s48, s2, 25
-; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
-; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s25
; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s37
-; GFX12-NEXT: v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v5, s39
-; GFX12-NEXT: s_lshr_b32 s26, s2, 22
+; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v3, s29
+; GFX12-NEXT: v_dual_mov_b32 v2, s28 :: v_dual_mov_b32 v5, s35
+; GFX12-NEXT: s_lshr_b32 s20, s2, 22
; GFX12-NEXT: s_lshr_b32 s50, s2, 23
; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v4, s38 :: v_dual_mov_b32 v7, s41
-; GFX12-NEXT: v_dual_mov_b32 v6, s40 :: v_dual_mov_b32 v9, s43
+; GFX12-NEXT: v_dual_mov_b32 v4, s34 :: v_dual_mov_b32 v7, s37
+; GFX12-NEXT: v_dual_mov_b32 v6, s36 :: v_dual_mov_b32 v9, s39
; GFX12-NEXT: s_lshr_b32 s52, s2, 20
; GFX12-NEXT: s_lshr_b32 s54, s2, 21
-; GFX12-NEXT: v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45
-; GFX12-NEXT: v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s47
+; GFX12-NEXT: v_dual_mov_b32 v8, s38 :: v_dual_mov_b32 v11, s43
+; GFX12-NEXT: v_dual_mov_b32 v10, s42 :: v_dual_mov_b32 v13, s47
; GFX12-NEXT: s_lshr_b32 s56, s2, 18
; GFX12-NEXT: s_lshr_b32 s58, s2, 19
; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v12, s46 :: v_dual_mov_b32 v15, s49
-; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX12-NEXT: v_mov_b32_e32 v14, s48
; GFX12-NEXT: s_lshr_b32 s60, s2, 16
; GFX12-NEXT: s_lshr_b32 s62, s2, 17
@@ -8420,18 +8424,18 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
-; GFX12-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v3, s51
-; GFX12-NEXT: v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v2, s50
+; GFX12-NEXT: v_dual_mov_b32 v0, s20 :: v_dual_mov_b32 v3, s51
+; GFX12-NEXT: v_dual_mov_b32 v1, s21 :: v_dual_mov_b32 v2, s50
; GFX12-NEXT: v_mov_b32_e32 v5, s53
-; GFX12-NEXT: s_lshr_b32 s30, s2, 12
-; GFX12-NEXT: s_lshr_b32 s28, s2, 13
-; GFX12-NEXT: s_lshr_b32 s24, s2, 10
-; GFX12-NEXT: s_lshr_b32 s22, s2, 11
+; GFX12-NEXT: s_lshr_b32 s44, s2, 12
+; GFX12-NEXT: s_lshr_b32 s40, s2, 13
+; GFX12-NEXT: s_lshr_b32 s30, s2, 10
+; GFX12-NEXT: s_lshr_b32 s26, s2, 11
; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s55
; GFX12-NEXT: v_dual_mov_b32 v6, s54 :: v_dual_mov_b32 v9, s57
-; GFX12-NEXT: s_lshr_b32 s20, s2, 8
+; GFX12-NEXT: s_lshr_b32 s22, s2, 8
; GFX12-NEXT: s_lshr_b32 s18, s2, 9
; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
@@ -8439,24 +8443,24 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s61
; GFX12-NEXT: s_lshr_b32 s16, s2, 6
; GFX12-NEXT: s_lshr_b32 s14, s2, 7
-; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v12, s60 :: v_dual_mov_b32 v15, s63
; GFX12-NEXT: v_dual_mov_b32 v14, s62 :: v_dual_mov_b32 v17, s65
; GFX12-NEXT: s_lshr_b32 s12, s2, 4
; GFX12-NEXT: s_lshr_b32 s10, s2, 5
; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v16, s64 :: v_dual_mov_b32 v19, s67
-; GFX12-NEXT: v_dual_mov_b32 v18, s66 :: v_dual_mov_b32 v21, s31
+; GFX12-NEXT: v_dual_mov_b32 v18, s66 :: v_dual_mov_b32 v21, s45
; GFX12-NEXT: s_lshr_b32 s8, s2, 2
; GFX12-NEXT: s_lshr_b32 s6, s2, 3
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v20, s30 :: v_dual_mov_b32 v23, s29
-; GFX12-NEXT: v_mov_b32_e32 v22, s28
+; GFX12-NEXT: v_dual_mov_b32 v20, s44 :: v_dual_mov_b32 v23, s41
+; GFX12-NEXT: v_mov_b32_e32 v22, s40
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
@@ -8464,13 +8468,13 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
-; GFX12-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v3, s23
-; GFX12-NEXT: v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v2, s22
-; GFX12-NEXT: v_mov_b32_e32 v5, s21
+; GFX12-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v3, s27
+; GFX12-NEXT: v_dual_mov_b32 v1, s31 :: v_dual_mov_b32 v2, s26
+; GFX12-NEXT: v_mov_b32_e32 v5, s23
; GFX12-NEXT: s_lshr_b32 s68, s2, 1
; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v7, s19
+; GFX12-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v7, s19
; GFX12-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v9, s17
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
@@ -8499,39 +8503,39 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_lshr_b32 s34, s2, 30
-; GFX1250-NEXT: s_lshr_b32 s36, s2, 31
-; GFX1250-NEXT: s_lshr_b32 s38, s2, 28
-; GFX1250-NEXT: s_lshr_b32 s40, s2, 29
-; GFX1250-NEXT: s_lshr_b32 s42, s2, 26
-; GFX1250-NEXT: s_lshr_b32 s44, s2, 27
-; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s24, s2, 30
+; GFX1250-NEXT: s_lshr_b32 s28, s2, 31
+; GFX1250-NEXT: s_lshr_b32 s34, s2, 28
+; GFX1250-NEXT: s_lshr_b32 s36, s2, 29
+; GFX1250-NEXT: s_lshr_b32 s38, s2, 26
+; GFX1250-NEXT: s_lshr_b32 s42, s2, 27
+; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX1250-NEXT: s_lshr_b32 s46, s2, 24
; GFX1250-NEXT: s_lshr_b32 s48, s2, 25
-; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s34
-; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v2, s24
; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v1, s35 :: v_dual_mov_b32 v2, s36
-; GFX1250-NEXT: v_dual_mov_b32 v3, s37 :: v_dual_mov_b32 v4, s38
-; GFX1250-NEXT: s_lshr_b32 s26, s2, 22
+; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v4, s28
+; GFX1250-NEXT: v_dual_mov_b32 v5, s29 :: v_dual_mov_b32 v6, s34
+; GFX1250-NEXT: s_lshr_b32 s20, s2, 22
; GFX1250-NEXT: s_lshr_b32 s50, s2, 23
; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s39 :: v_dual_mov_b32 v6, s40
-; GFX1250-NEXT: v_dual_mov_b32 v7, s41 :: v_dual_mov_b32 v8, s42
+; GFX1250-NEXT: v_dual_mov_b32 v7, s35 :: v_dual_mov_b32 v8, s36
+; GFX1250-NEXT: v_dual_mov_b32 v9, s37 :: v_dual_mov_b32 v10, s38
; GFX1250-NEXT: s_lshr_b32 s52, s2, 20
; GFX1250-NEXT: s_lshr_b32 s54, s2, 21
-; GFX1250-NEXT: v_dual_mov_b32 v9, s43 :: v_dual_mov_b32 v10, s44
-; GFX1250-NEXT: v_dual_mov_b32 v11, s45 :: v_dual_mov_b32 v12, s46
+; GFX1250-NEXT: v_dual_mov_b32 v11, s39 :: v_dual_mov_b32 v12, s42
+; GFX1250-NEXT: v_dual_mov_b32 v13, s43 :: v_dual_mov_b32 v14, s46
; GFX1250-NEXT: s_lshr_b32 s56, s2, 18
; GFX1250-NEXT: s_lshr_b32 s58, s2, 19
; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v13, s47 :: v_dual_mov_b32 v14, s48
-; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX1250-NEXT: v_mov_b32_e32 v15, s49
+; GFX1250-NEXT: v_dual_mov_b32 v15, s47 :: v_dual_mov_b32 v16, s48
+; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX1250-NEXT: v_mov_b32_e32 v17, s49
; GFX1250-NEXT: s_lshr_b32 s60, s2, 16
; GFX1250-NEXT: s_lshr_b32 s62, s2, 17
; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
@@ -8541,91 +8545,91 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX1250-NEXT: s_clause 0x3
-; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
-; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
-; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
-; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
+; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:240
+; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[0:1] offset:224
+; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[0:1] offset:208
+; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v1, s27
-; GFX1250-NEXT: v_dual_mov_b32 v2, s50 :: v_dual_mov_b32 v3, s51
+; GFX1250-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v3, s21
+; GFX1250-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_mov_b32_e32 v4, s52
-; GFX1250-NEXT: s_lshr_b32 s30, s2, 12
-; GFX1250-NEXT: s_lshr_b32 s28, s2, 13
-; GFX1250-NEXT: s_lshr_b32 s24, s2, 10
-; GFX1250-NEXT: s_lshr_b32 s22, s2, 11
+; GFX1250-NEXT: v_mov_b32_e32 v6, s52
+; GFX1250-NEXT: s_lshr_b32 s44, s2, 12
+; GFX1250-NEXT: s_lshr_b32 s40, s2, 13
+; GFX1250-NEXT: s_lshr_b32 s30, s2, 10
+; GFX1250-NEXT: s_lshr_b32 s26, s2, 11
; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s53 :: v_dual_mov_b32 v6, s54
+; GFX1250-NEXT: v_dual_mov_b32 v7, s53 :: v_dual_mov_b32 v8, s54
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v7, s55 :: v_dual_mov_b32 v8, s56
-; GFX1250-NEXT: s_lshr_b32 s20, s2, 8
+; GFX1250-NEXT: v_dual_mov_b32 v9, s55 :: v_dual_mov_b32 v10, s56
+; GFX1250-NEXT: s_lshr_b32 s22, s2, 8
; GFX1250-NEXT: s_lshr_b32 s18, s2, 9
; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v9, s57 :: v_dual_mov_b32 v10, s58
+; GFX1250-NEXT: v_dual_mov_b32 v11, s57 :: v_dual_mov_b32 v12, s58
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v11, s59 :: v_dual_mov_b32 v12, s60
+; GFX1250-NEXT: v_dual_mov_b32 v13, s59 :: v_dual_mov_b32 v14, s60
; GFX1250-NEXT: s_lshr_b32 s16, s2, 6
; GFX1250-NEXT: s_lshr_b32 s14, s2, 7
-; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v13, s61 :: v_dual_mov_b32 v14, s62
-; GFX1250-NEXT: v_dual_mov_b32 v15, s63 :: v_dual_mov_b32 v16, s64
+; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v15, s61 :: v_dual_mov_b32 v16, s62
+; GFX1250-NEXT: v_dual_mov_b32 v17, s63 :: v_dual_mov_b32 v18, s64
; GFX1250-NEXT: s_lshr_b32 s12, s2, 4
; GFX1250-NEXT: s_lshr_b32 s10, s2, 5
; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v17, s65 :: v_dual_mov_b32 v18, s66
-; GFX1250-NEXT: v_dual_mov_b32 v19, s67 :: v_dual_mov_b32 v20, s30
+; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v19, s65 :: v_dual_mov_b32 v20, s66
+; GFX1250-NEXT: v_dual_mov_b32 v21, s67 :: v_dual_mov_b32 v22, s44
; GFX1250-NEXT: s_lshr_b32 s8, s2, 2
; GFX1250-NEXT: s_lshr_b32 s6, s2, 3
; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v21, s31 :: v_dual_mov_b32 v22, s28
-; GFX1250-NEXT: v_mov_b32_e32 v23, s29
+; GFX1250-NEXT: v_dual_mov_b32 v23, s45 :: v_dual_mov_b32 v24, s40
+; GFX1250-NEXT: v_mov_b32_e32 v25, s41
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
-; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
-; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
-; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
-; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
-; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
+; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:176
+; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[0:1] offset:160
+; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[0:1] offset:144
+; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[0:1] offset:128
+; GFX1250-NEXT: global_store_b128 v0, v[18:21], s[0:1] offset:112
+; GFX1250-NEXT: global_store_b128 v0, v[22:25], s[0:1] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
-; GFX1250-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23
+; GFX1250-NEXT: v_dual_mov_b32 v2, s30 :: v_dual_mov_b32 v3, s31
+; GFX1250-NEXT: v_dual_mov_b32 v4, s26 :: v_dual_mov_b32 v5, s27
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s20
+; GFX1250-NEXT: v_mov_b32_e32 v6, s22
; GFX1250-NEXT: s_lshr_b32 s68, s2, 1
; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s21 :: v_dual_mov_b32 v6, s18
+; GFX1250-NEXT: v_dual_mov_b32 v7, s23 :: v_dual_mov_b32 v8, s18
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s19 :: v_dual_mov_b32 v8, s16
+; GFX1250-NEXT: v_dual_mov_b32 v9, s19 :: v_dual_mov_b32 v10, s16
; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v9, s17 :: v_dual_mov_b32 v10, s14
+; GFX1250-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v12, s14
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v12, s12
+; GFX1250-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v14, s12
; GFX1250-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v14, s10
+; GFX1250-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v16, s10
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v16, s8
-; GFX1250-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v18, s6
+; GFX1250-NEXT: v_dual_mov_b32 v17, s11 :: v_dual_mov_b32 v18, s8
+; GFX1250-NEXT: v_dual_mov_b32 v19, s9 :: v_dual_mov_b32 v20, s6
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v20, s4
-; GFX1250-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v22, s2
-; GFX1250-NEXT: v_mov_b32_e32 v23, s3
+; GFX1250-NEXT: v_dual_mov_b32 v21, s7 :: v_dual_mov_b32 v22, s4
+; GFX1250-NEXT: v_dual_mov_b32 v23, s5 :: v_dual_mov_b32 v24, s2
+; GFX1250-NEXT: v_mov_b32_e32 v25, s3
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
-; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
-; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[0:1]
+; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[0:1] offset:80
+; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[0:1] offset:64
+; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v0, v[18:21], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v0, v[22:25], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <32 x i1>, ptr addrspace(4) %in
%ext = sext <32 x i1> %load to <32 x i64>
@@ -9668,13 +9672,15 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: s_lshr_b32 s4, s3, 31
; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001e
-; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10004
-; GFX1250-NEXT: s_and_b32 s7, s2, 1
+; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10006
+; GFX1250-NEXT: v_mov_b32_e32 v7, v1
+; GFX1250-NEXT: s_bfe_u32 s7, s2, 0x10004
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001d
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001c
+; GFX1250-NEXT: s_and_b32 s8, s2, 1
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
@@ -9798,21 +9804,20 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10007
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10006
-; GFX1250-NEXT: v_mov_b32_e32 v7, v1
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v2, s4
; GFX1250-NEXT: s_mov_b32 s4, s3
-; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10001
+; GFX1250-NEXT: s_bfe_u32 s6, s3, 0x10001
; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10005
-; GFX1250-NEXT: v_mov_b32_e32 v6, s5
+; GFX1250-NEXT: v_mov_b32_e32 v6, s6
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v2, s3
+; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s3
; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10003
-; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10001
+; GFX1250-NEXT: s_bfe_u32 s7, s2, 0x10001
; GFX1250-NEXT: s_bfe_u32 s2, s2, 0x10002
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
; GFX1250-NEXT: s_wait_xcnt 0x0
@@ -9822,7 +9827,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s6
+; GFX1250-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v2, s7
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:256
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1]
@@ -9841,256 +9846,257 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: ; implicit-def: $sgpr40_sgpr41
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshr_b32 s42, s5, 30
-; GFX6-NEXT: s_lshr_b32 s36, s4, 30
-; GFX6-NEXT: s_lshr_b32 s38, s4, 31
-; GFX6-NEXT: s_lshr_b32 s30, s4, 28
-; GFX6-NEXT: s_lshr_b32 s34, s4, 29
-; GFX6-NEXT: s_lshr_b32 s26, s4, 26
-; GFX6-NEXT: s_lshr_b32 s28, s4, 27
-; GFX6-NEXT: s_lshr_b32 s22, s4, 24
-; GFX6-NEXT: s_lshr_b32 s24, s4, 25
-; GFX6-NEXT: s_lshr_b32 s18, s4, 22
-; GFX6-NEXT: s_lshr_b32 s20, s4, 23
-; GFX6-NEXT: s_lshr_b32 s14, s4, 20
-; GFX6-NEXT: s_lshr_b32 s16, s4, 21
-; GFX6-NEXT: s_lshr_b32 s10, s4, 18
-; GFX6-NEXT: s_lshr_b32 s12, s4, 19
+; GFX6-NEXT: s_lshr_b32 s34, s4, 30
+; GFX6-NEXT: s_lshr_b32 s36, s4, 31
+; GFX6-NEXT: s_lshr_b32 s28, s4, 28
+; GFX6-NEXT: s_lshr_b32 s30, s4, 29
+; GFX6-NEXT: s_lshr_b32 s24, s4, 26
+; GFX6-NEXT: s_lshr_b32 s26, s4, 27
+; GFX6-NEXT: s_lshr_b32 s20, s4, 24
+; GFX6-NEXT: s_lshr_b32 s22, s4, 25
+; GFX6-NEXT: s_lshr_b32 s16, s4, 22
+; GFX6-NEXT: s_lshr_b32 s18, s4, 23
+; GFX6-NEXT: s_lshr_b32 s12, s4, 20
+; GFX6-NEXT: s_lshr_b32 s14, s4, 21
+; GFX6-NEXT: s_lshr_b32 s8, s4, 18
+; GFX6-NEXT: s_lshr_b32 s10, s4, 19
; GFX6-NEXT: s_lshr_b32 s6, s4, 16
-; GFX6-NEXT: s_lshr_b32 s8, s4, 17
; GFX6-NEXT: s_ashr_i32 s7, s5, 31
; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v4, s7
-; GFX6-NEXT: s_lshr_b32 s40, s4, 14
+; GFX6-NEXT: s_lshr_b32 s38, s4, 17
; GFX6-NEXT: v_mov_b32_e32 v0, s44
; GFX6-NEXT: v_mov_b32_e32 v1, s45
-; GFX6-NEXT: s_mov_b32 s44, s5
-; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v6, s44
-; GFX6-NEXT: v_mov_b32_e32 v7, s45
-; GFX6-NEXT: s_lshr_b32 s44, s4, 15
-; GFX6-NEXT: v_mov_b32_e32 v2, s42
-; GFX6-NEXT: v_mov_b32_e32 v3, s43
-; GFX6-NEXT: s_lshr_b32 s42, s4, 12
+; GFX6-NEXT: s_lshr_b32 s44, s4, 14
+; GFX6-NEXT: s_mov_b32 s40, s5
+; GFX6-NEXT: s_bfe_i64 s[46:47], s[40:41], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[40:41], s[42:43], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v6, s46
+; GFX6-NEXT: v_mov_b32_e32 v7, s47
+; GFX6-NEXT: s_lshr_b32 s42, s4, 15
+; GFX6-NEXT: v_mov_b32_e32 v2, s40
+; GFX6-NEXT: v_mov_b32_e32 v3, s41
+; GFX6-NEXT: s_lshr_b32 s40, s4, 12
+; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v8, s36
-; GFX6-NEXT: v_mov_b32_e32 v9, s37
-; GFX6-NEXT: s_lshr_b32 s36, s4, 13
-; GFX6-NEXT: v_mov_b32_e32 v10, s38
-; GFX6-NEXT: v_mov_b32_e32 v11, s39
-; GFX6-NEXT: s_lshr_b32 s38, s4, 10
+; GFX6-NEXT: v_mov_b32_e32 v8, s34
+; GFX6-NEXT: v_mov_b32_e32 v9, s35
+; GFX6-NEXT: s_lshr_b32 s34, s4, 13
+; GFX6-NEXT: v_mov_b32_e32 v10, s36
+; GFX6-NEXT: v_mov_b32_e32 v11, s37
+; GFX6-NEXT: s_lshr_b32 s36, s4, 10
+; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v12, s30
-; GFX6-NEXT: v_mov_b32_e32 v13, s31
-; GFX6-NEXT: s_lshr_b32 s30, s4, 11
-; GFX6-NEXT: v_mov_b32_e32 v14, s34
-; GFX6-NEXT: v_mov_b32_e32 v15, s35
-; GFX6-NEXT: s_lshr_b32 s34, s4, 8
-; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v12, s28
+; GFX6-NEXT: v_mov_b32_e32 v13, s29
+; GFX6-NEXT: s_lshr_b32 s28, s4, 11
+; GFX6-NEXT: v_mov_b32_e32 v14, s30
+; GFX6-NEXT: v_mov_b32_e32 v15, s31
+; GFX6-NEXT: s_lshr_b32 s30, s4, 8
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v5, s7
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:496
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s26
-; GFX6-NEXT: v_mov_b32_e32 v3, s27
-; GFX6-NEXT: s_lshr_b32 s26, s4, 9
-; GFX6-NEXT: v_mov_b32_e32 v4, s28
-; GFX6-NEXT: v_mov_b32_e32 v5, s29
-; GFX6-NEXT: s_lshr_b32 s28, s4, 6
-; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v2, s24
+; GFX6-NEXT: v_mov_b32_e32 v3, s25
+; GFX6-NEXT: s_lshr_b32 s24, s4, 9
+; GFX6-NEXT: v_mov_b32_e32 v4, s26
+; GFX6-NEXT: v_mov_b32_e32 v5, s27
+; GFX6-NEXT: s_lshr_b32 s26, s4, 6
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v8, s22
-; GFX6-NEXT: v_mov_b32_e32 v9, s23
-; GFX6-NEXT: s_lshr_b32 s22, s4, 7
-; GFX6-NEXT: v_mov_b32_e32 v10, s24
-; GFX6-NEXT: v_mov_b32_e32 v11, s25
-; GFX6-NEXT: s_lshr_b32 s24, s4, 4
-; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v8, s20
+; GFX6-NEXT: v_mov_b32_e32 v9, s21
+; GFX6-NEXT: s_lshr_b32 s20, s4, 7
+; GFX6-NEXT: v_mov_b32_e32 v10, s22
+; GFX6-NEXT: v_mov_b32_e32 v11, s23
+; GFX6-NEXT: s_lshr_b32 s22, s4, 4
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v12, s18
-; GFX6-NEXT: v_mov_b32_e32 v13, s19
-; GFX6-NEXT: s_lshr_b32 s18, s4, 5
-; GFX6-NEXT: v_mov_b32_e32 v14, s20
-; GFX6-NEXT: v_mov_b32_e32 v15, s21
-; GFX6-NEXT: s_lshr_b32 s20, s4, 2
-; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v12, s16
+; GFX6-NEXT: v_mov_b32_e32 v13, s17
+; GFX6-NEXT: s_lshr_b32 s16, s4, 5
+; GFX6-NEXT: v_mov_b32_e32 v14, s18
+; GFX6-NEXT: v_mov_b32_e32 v15, s19
+; GFX6-NEXT: s_lshr_b32 s18, s4, 2
; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:208
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s14
-; GFX6-NEXT: v_mov_b32_e32 v3, s15
-; GFX6-NEXT: s_lshr_b32 s14, s4, 3
-; GFX6-NEXT: v_mov_b32_e32 v4, s16
-; GFX6-NEXT: v_mov_b32_e32 v5, s17
-; GFX6-NEXT: s_lshr_b32 s16, s4, 1
-; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v2, s12
+; GFX6-NEXT: v_mov_b32_e32 v3, s13
+; GFX6-NEXT: s_lshr_b32 s12, s4, 3
+; GFX6-NEXT: v_mov_b32_e32 v4, s14
+; GFX6-NEXT: v_mov_b32_e32 v5, s15
+; GFX6-NEXT: s_lshr_b32 s14, s4, 1
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v8, s10
-; GFX6-NEXT: v_mov_b32_e32 v9, s11
-; GFX6-NEXT: s_lshr_b32 s10, s5, 29
-; GFX6-NEXT: v_mov_b32_e32 v10, s12
-; GFX6-NEXT: v_mov_b32_e32 v11, s13
-; GFX6-NEXT: s_lshr_b32 s12, s5, 28
-; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v8, s8
+; GFX6-NEXT: v_mov_b32_e32 v9, s9
+; GFX6-NEXT: s_lshr_b32 s8, s5, 29
+; GFX6-NEXT: v_mov_b32_e32 v10, s10
+; GFX6-NEXT: v_mov_b32_e32 v11, s11
+; GFX6-NEXT: s_lshr_b32 s10, s5, 28
+; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s6
; GFX6-NEXT: v_mov_b32_e32 v13, s7
; GFX6-NEXT: s_lshr_b32 s6, s5, 26
-; GFX6-NEXT: v_mov_b32_e32 v14, s8
-; GFX6-NEXT: v_mov_b32_e32 v15, s9
-; GFX6-NEXT: s_lshr_b32 s8, s5, 27
+; GFX6-NEXT: v_mov_b32_e32 v14, s38
+; GFX6-NEXT: v_mov_b32_e32 v15, s39
+; GFX6-NEXT: s_lshr_b32 s38, s5, 27
+; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s40
-; GFX6-NEXT: v_mov_b32_e32 v3, s41
-; GFX6-NEXT: s_lshr_b32 s40, s5, 25
-; GFX6-NEXT: v_mov_b32_e32 v4, s44
-; GFX6-NEXT: v_mov_b32_e32 v5, s45
-; GFX6-NEXT: s_lshr_b32 s44, s5, 24
-; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v2, s44
+; GFX6-NEXT: v_mov_b32_e32 v3, s45
+; GFX6-NEXT: s_lshr_b32 s44, s5, 25
+; GFX6-NEXT: v_mov_b32_e32 v4, s42
+; GFX6-NEXT: v_mov_b32_e32 v5, s43
+; GFX6-NEXT: s_lshr_b32 s42, s5, 24
+; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v8, s42
-; GFX6-NEXT: v_mov_b32_e32 v9, s43
-; GFX6-NEXT: s_lshr_b32 s42, s5, 22
-; GFX6-NEXT: v_mov_b32_e32 v10, s36
-; GFX6-NEXT: v_mov_b32_e32 v11, s37
-; GFX6-NEXT: s_lshr_b32 s36, s5, 23
-; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v8, s40
+; GFX6-NEXT: v_mov_b32_e32 v9, s41
+; GFX6-NEXT: s_lshr_b32 s40, s5, 22
+; GFX6-NEXT: v_mov_b32_e32 v10, s34
+; GFX6-NEXT: v_mov_b32_e32 v11, s35
+; GFX6-NEXT: s_lshr_b32 s34, s5, 23
+; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v12, s38
-; GFX6-NEXT: v_mov_b32_e32 v13, s39
-; GFX6-NEXT: s_lshr_b32 s38, s5, 20
-; GFX6-NEXT: v_mov_b32_e32 v14, s30
-; GFX6-NEXT: v_mov_b32_e32 v15, s31
+; GFX6-NEXT: v_mov_b32_e32 v12, s36
+; GFX6-NEXT: v_mov_b32_e32 v13, s37
+; GFX6-NEXT: s_lshr_b32 s36, s5, 20
+; GFX6-NEXT: v_mov_b32_e32 v14, s28
+; GFX6-NEXT: v_mov_b32_e32 v15, s29
; GFX6-NEXT: s_lshr_b32 s4, s5, 21
-; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[30:31], s[34:35], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[28:29], s[30:31], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s30
-; GFX6-NEXT: v_mov_b32_e32 v3, s31
-; GFX6-NEXT: s_lshr_b32 s30, s5, 18
-; GFX6-NEXT: v_mov_b32_e32 v4, s26
-; GFX6-NEXT: v_mov_b32_e32 v5, s27
-; GFX6-NEXT: s_lshr_b32 s26, s5, 19
-; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v2, s28
+; GFX6-NEXT: v_mov_b32_e32 v3, s29
+; GFX6-NEXT: s_lshr_b32 s28, s5, 18
+; GFX6-NEXT: v_mov_b32_e32 v4, s24
+; GFX6-NEXT: v_mov_b32_e32 v5, s25
+; GFX6-NEXT: s_lshr_b32 s24, s5, 19
+; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v8, s28
-; GFX6-NEXT: v_mov_b32_e32 v9, s29
-; GFX6-NEXT: s_lshr_b32 s28, s5, 17
-; GFX6-NEXT: v_mov_b32_e32 v10, s22
-; GFX6-NEXT: v_mov_b32_e32 v11, s23
-; GFX6-NEXT: s_lshr_b32 s22, s5, 16
-; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v8, s26
+; GFX6-NEXT: v_mov_b32_e32 v9, s27
+; GFX6-NEXT: s_lshr_b32 s26, s5, 17
+; GFX6-NEXT: v_mov_b32_e32 v10, s20
+; GFX6-NEXT: v_mov_b32_e32 v11, s21
+; GFX6-NEXT: s_lshr_b32 s20, s5, 16
+; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v12, s24
-; GFX6-NEXT: v_mov_b32_e32 v13, s25
-; GFX6-NEXT: s_lshr_b32 s24, s5, 14
-; GFX6-NEXT: v_mov_b32_e32 v14, s18
-; GFX6-NEXT: v_mov_b32_e32 v15, s19
-; GFX6-NEXT: s_lshr_b32 s18, s5, 15
-; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v12, s22
+; GFX6-NEXT: v_mov_b32_e32 v13, s23
+; GFX6-NEXT: s_lshr_b32 s22, s5, 14
+; GFX6-NEXT: v_mov_b32_e32 v14, s16
+; GFX6-NEXT: v_mov_b32_e32 v15, s17
+; GFX6-NEXT: s_lshr_b32 s16, s5, 15
+; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
-; GFX6-NEXT: v_mov_b32_e32 v16, s20
-; GFX6-NEXT: v_mov_b32_e32 v17, s21
-; GFX6-NEXT: s_lshr_b32 s20, s5, 12
+; GFX6-NEXT: v_mov_b32_e32 v16, s18
+; GFX6-NEXT: v_mov_b32_e32 v17, s19
+; GFX6-NEXT: s_lshr_b32 s18, s5, 12
+; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX6-NEXT: v_mov_b32_e32 v18, s14
-; GFX6-NEXT: v_mov_b32_e32 v19, s15
-; GFX6-NEXT: s_lshr_b32 s14, s5, 13
+; GFX6-NEXT: v_mov_b32_e32 v18, s12
+; GFX6-NEXT: v_mov_b32_e32 v19, s13
+; GFX6-NEXT: s_lshr_b32 s12, s5, 13
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v2, s16
-; GFX6-NEXT: v_mov_b32_e32 v3, s17
-; GFX6-NEXT: s_lshr_b32 s16, s5, 10
-; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v2, s14
+; GFX6-NEXT: v_mov_b32_e32 v3, s15
+; GFX6-NEXT: s_lshr_b32 s14, s5, 10
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v8, s12
-; GFX6-NEXT: v_mov_b32_e32 v9, s13
-; GFX6-NEXT: s_lshr_b32 s12, s5, 11
-; GFX6-NEXT: v_mov_b32_e32 v10, s10
-; GFX6-NEXT: v_mov_b32_e32 v11, s11
-; GFX6-NEXT: s_lshr_b32 s10, s5, 8
-; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v8, s10
+; GFX6-NEXT: v_mov_b32_e32 v9, s11
+; GFX6-NEXT: s_lshr_b32 s10, s5, 11
+; GFX6-NEXT: v_mov_b32_e32 v10, s8
+; GFX6-NEXT: v_mov_b32_e32 v11, s9
+; GFX6-NEXT: s_lshr_b32 s8, s5, 8
+; GFX6-NEXT: s_bfe_i64 s[30:31], s[38:39], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s6
; GFX6-NEXT: v_mov_b32_e32 v13, s7
; GFX6-NEXT: s_lshr_b32 s6, s5, 9
-; GFX6-NEXT: v_mov_b32_e32 v14, s8
-; GFX6-NEXT: v_mov_b32_e32 v15, s9
-; GFX6-NEXT: s_lshr_b32 s8, s5, 6
-; GFX6-NEXT: s_bfe_i64 s[34:35], s[44:45], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v14, s30
+; GFX6-NEXT: v_mov_b32_e32 v15, s31
+; GFX6-NEXT: s_lshr_b32 s30, s5, 6
+; GFX6-NEXT: s_bfe_i64 s[38:39], s[42:43], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[42:43], s[44:45], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v16, s34
-; GFX6-NEXT: v_mov_b32_e32 v17, s35
-; GFX6-NEXT: s_lshr_b32 s34, s5, 7
-; GFX6-NEXT: v_mov_b32_e32 v18, s40
-; GFX6-NEXT: v_mov_b32_e32 v19, s41
-; GFX6-NEXT: s_lshr_b32 s40, s5, 4
-; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v16, s38
+; GFX6-NEXT: v_mov_b32_e32 v17, s39
+; GFX6-NEXT: s_lshr_b32 s38, s5, 7
+; GFX6-NEXT: v_mov_b32_e32 v18, s42
+; GFX6-NEXT: v_mov_b32_e32 v19, s43
+; GFX6-NEXT: s_lshr_b32 s42, s5, 4
+; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s42
-; GFX6-NEXT: v_mov_b32_e32 v1, s43
-; GFX6-NEXT: s_lshr_b32 s42, s5, 5
-; GFX6-NEXT: v_mov_b32_e32 v2, s36
-; GFX6-NEXT: v_mov_b32_e32 v3, s37
-; GFX6-NEXT: s_lshr_b32 s36, s5, 2
-; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT: v_mov_b32_e32 v0, s40
+; GFX6-NEXT: v_mov_b32_e32 v1, s41
+; GFX6-NEXT: s_lshr_b32 s40, s5, 5
+; GFX6-NEXT: v_mov_b32_e32 v2, s34
+; GFX6-NEXT: v_mov_b32_e32 v3, s35
+; GFX6-NEXT: s_lshr_b32 s34, s5, 2
+; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v8, s38
-; GFX6-NEXT: v_mov_b32_e32 v9, s39
-; GFX6-NEXT: s_lshr_b32 s38, s5, 3
+; GFX6-NEXT: v_mov_b32_e32 v8, s36
+; GFX6-NEXT: v_mov_b32_e32 v9, s37
+; GFX6-NEXT: s_lshr_b32 s36, s5, 3
; GFX6-NEXT: s_lshr_b32 s44, s5, 1
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:448
@@ -10099,58 +10105,58 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX6-NEXT: v_mov_b32_e32 v11, s5
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:416
; GFX6-NEXT: s_waitcnt expcnt(1)
-; GFX6-NEXT: v_mov_b32_e32 v0, s30
-; GFX6-NEXT: v_mov_b32_e32 v1, s31
+; GFX6-NEXT: v_mov_b32_e32 v0, s28
+; GFX6-NEXT: v_mov_b32_e32 v1, s29
+; GFX6-NEXT: v_mov_b32_e32 v2, s24
+; GFX6-NEXT: v_mov_b32_e32 v3, s25
+; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
+; GFX6-NEXT: s_waitcnt expcnt(0)
+; GFX6-NEXT: v_mov_b32_e32 v0, s20
+; GFX6-NEXT: v_mov_b32_e32 v1, s21
; GFX6-NEXT: v_mov_b32_e32 v2, s26
; GFX6-NEXT: v_mov_b32_e32 v3, s27
-; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
+; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s22
; GFX6-NEXT: v_mov_b32_e32 v1, s23
-; GFX6-NEXT: v_mov_b32_e32 v2, s28
-; GFX6-NEXT: v_mov_b32_e32 v3, s29
-; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
-; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s24
-; GFX6-NEXT: v_mov_b32_e32 v1, s25
-; GFX6-NEXT: v_mov_b32_e32 v2, s18
-; GFX6-NEXT: v_mov_b32_e32 v3, s19
+; GFX6-NEXT: v_mov_b32_e32 v2, s16
+; GFX6-NEXT: v_mov_b32_e32 v3, s17
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s20
-; GFX6-NEXT: v_mov_b32_e32 v1, s21
-; GFX6-NEXT: v_mov_b32_e32 v2, s14
-; GFX6-NEXT: v_mov_b32_e32 v3, s15
-; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
-; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s16
-; GFX6-NEXT: v_mov_b32_e32 v1, s17
+; GFX6-NEXT: v_mov_b32_e32 v0, s18
+; GFX6-NEXT: v_mov_b32_e32 v1, s19
; GFX6-NEXT: v_mov_b32_e32 v2, s12
; GFX6-NEXT: v_mov_b32_e32 v3, s13
+; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
+; GFX6-NEXT: s_waitcnt expcnt(0)
+; GFX6-NEXT: v_mov_b32_e32 v0, s14
+; GFX6-NEXT: v_mov_b32_e32 v1, s15
+; GFX6-NEXT: v_mov_b32_e32 v2, s10
+; GFX6-NEXT: v_mov_b32_e32 v3, s11
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s10
-; GFX6-NEXT: v_mov_b32_e32 v1, s11
+; GFX6-NEXT: v_mov_b32_e32 v0, s8
+; GFX6-NEXT: v_mov_b32_e32 v1, s9
; GFX6-NEXT: v_mov_b32_e32 v2, s6
; GFX6-NEXT: v_mov_b32_e32 v3, s7
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s8
-; GFX6-NEXT: v_mov_b32_e32 v1, s9
-; GFX6-NEXT: v_mov_b32_e32 v2, s34
-; GFX6-NEXT: v_mov_b32_e32 v3, s35
+; GFX6-NEXT: v_mov_b32_e32 v0, s30
+; GFX6-NEXT: v_mov_b32_e32 v1, s31
+; GFX6-NEXT: v_mov_b32_e32 v2, s38
+; GFX6-NEXT: v_mov_b32_e32 v3, s39
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s40
-; GFX6-NEXT: v_mov_b32_e32 v1, s41
-; GFX6-NEXT: v_mov_b32_e32 v2, s42
-; GFX6-NEXT: v_mov_b32_e32 v3, s43
+; GFX6-NEXT: v_mov_b32_e32 v0, s42
+; GFX6-NEXT: v_mov_b32_e32 v1, s43
+; GFX6-NEXT: v_mov_b32_e32 v2, s40
+; GFX6-NEXT: v_mov_b32_e32 v3, s41
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288
; GFX6-NEXT: s_waitcnt expcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v0, s36
-; GFX6-NEXT: v_mov_b32_e32 v1, s37
-; GFX6-NEXT: v_mov_b32_e32 v2, s38
-; GFX6-NEXT: v_mov_b32_e32 v3, s39
+; GFX6-NEXT: v_mov_b32_e32 v0, s34
+; GFX6-NEXT: v_mov_b32_e32 v1, s35
+; GFX6-NEXT: v_mov_b32_e32 v2, s36
+; GFX6-NEXT: v_mov_b32_e32 v3, s37
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272
; GFX6-NEXT: v_mov_b32_e32 v8, s44
; GFX6-NEXT: v_mov_b32_e32 v9, s45
@@ -10161,44 +10167,45 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
; GFX8-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane
+; GFX8-NEXT: ; implicit-def: $sgpr22_sgpr23
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[10:11], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s0, s3, 8
; GFX8-NEXT: s_lshr_b32 s48, s3, 15
; GFX8-NEXT: v_writelane_b32 v62, s0, 0
-; GFX8-NEXT: s_lshr_b32 s74, s3, 30
-; GFX8-NEXT: s_lshr_b32 s30, s3, 31
-; GFX8-NEXT: s_lshr_b32 s72, s3, 28
-; GFX8-NEXT: s_lshr_b32 s34, s3, 29
-; GFX8-NEXT: s_lshr_b32 s70, s3, 26
+; GFX8-NEXT: s_lshr_b32 s64, s3, 30
+; GFX8-NEXT: s_lshr_b32 s24, s3, 31
+; GFX8-NEXT: s_lshr_b32 s68, s3, 28
+; GFX8-NEXT: s_lshr_b32 s30, s3, 29
+; GFX8-NEXT: s_lshr_b32 s72, s3, 26
; GFX8-NEXT: s_lshr_b32 s36, s3, 27
-; GFX8-NEXT: s_lshr_b32 s68, s3, 24
+; GFX8-NEXT: s_lshr_b32 s74, s3, 24
; GFX8-NEXT: s_lshr_b32 s38, s3, 25
-; GFX8-NEXT: s_lshr_b32 s64, s3, 22
+; GFX8-NEXT: s_lshr_b32 s70, s3, 22
; GFX8-NEXT: s_lshr_b32 s40, s3, 23
-; GFX8-NEXT: s_lshr_b32 s60, s3, 20
+; GFX8-NEXT: s_lshr_b32 s66, s3, 20
; GFX8-NEXT: s_lshr_b32 s42, s3, 21
-; GFX8-NEXT: s_lshr_b32 s66, s3, 18
+; GFX8-NEXT: s_lshr_b32 s62, s3, 18
; GFX8-NEXT: s_lshr_b32 s44, s3, 19
; GFX8-NEXT: s_lshr_b32 s56, s3, 16
; GFX8-NEXT: s_lshr_b32 s46, s3, 17
; GFX8-NEXT: s_lshr_b32 s58, s3, 14
-; GFX8-NEXT: s_lshr_b32 s62, s3, 12
+; GFX8-NEXT: s_lshr_b32 s60, s3, 12
; GFX8-NEXT: s_lshr_b32 s54, s3, 10
; GFX8-NEXT: v_writelane_b32 v62, s1, 1
; GFX8-NEXT: s_lshr_b32 s0, s3, 9
; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX8-NEXT: s_lshr_b32 s52, s3, 11
; GFX8-NEXT: v_writelane_b32 v62, s0, 2
-; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
@@ -10208,8 +10215,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX8-NEXT: v_mov_b32_e32 v34, s48
; GFX8-NEXT: s_lshr_b32 s48, s2, 1
; GFX8-NEXT: s_lshr_b32 s50, s3, 13
@@ -10222,30 +10229,30 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_lshr_b32 s18, s3, 3
; GFX8-NEXT: s_lshr_b32 s20, s3, 1
; GFX8-NEXT: s_mov_b32 s22, s3
-; GFX8-NEXT: s_lshr_b32 s24, s2, 30
-; GFX8-NEXT: s_lshr_b32 s26, s2, 31
-; GFX8-NEXT: s_lshr_b32 s28, s2, 28
-; GFX8-NEXT: v_mov_b32_e32 v4, s74
-; GFX8-NEXT: v_mov_b32_e32 v12, s72
-; GFX8-NEXT: v_mov_b32_e32 v0, s70
-; GFX8-NEXT: v_mov_b32_e32 v8, s68
-; GFX8-NEXT: v_mov_b32_e32 v16, s64
-; GFX8-NEXT: v_mov_b32_e32 v20, s60
-; GFX8-NEXT: v_mov_b32_e32 v24, s66
+; GFX8-NEXT: s_lshr_b32 s26, s2, 30
+; GFX8-NEXT: s_lshr_b32 s28, s2, 31
+; GFX8-NEXT: s_lshr_b32 s34, s2, 28
+; GFX8-NEXT: v_mov_b32_e32 v4, s64
+; GFX8-NEXT: v_mov_b32_e32 v12, s68
+; GFX8-NEXT: v_mov_b32_e32 v0, s72
+; GFX8-NEXT: v_mov_b32_e32 v8, s74
+; GFX8-NEXT: v_mov_b32_e32 v16, s70
+; GFX8-NEXT: v_mov_b32_e32 v20, s66
+; GFX8-NEXT: v_mov_b32_e32 v24, s62
; GFX8-NEXT: v_mov_b32_e32 v28, s56
; GFX8-NEXT: v_mov_b32_e32 v32, s58
-; GFX8-NEXT: v_mov_b32_e32 v36, s62
+; GFX8-NEXT: v_mov_b32_e32 v36, s60
; GFX8-NEXT: s_lshr_b32 s86, s2, 29
; GFX8-NEXT: v_mov_b32_e32 v40, s54
; GFX8-NEXT: s_lshr_b32 s84, s2, 26
; GFX8-NEXT: s_lshr_b32 s82, s2, 27
; GFX8-NEXT: s_bfe_i64 vcc, s[52:53], 0x10000
; GFX8-NEXT: s_lshr_b32 s80, s2, 24
-; GFX8-NEXT: v_mov_b32_e32 v6, s30
-; GFX8-NEXT: v_mov_b32_e32 v7, s31
+; GFX8-NEXT: v_mov_b32_e32 v6, s24
+; GFX8-NEXT: v_mov_b32_e32 v7, s25
; GFX8-NEXT: s_lshr_b32 s78, s2, 25
; GFX8-NEXT: s_lshr_b32 s76, s2, 22
-; GFX8-NEXT: v_mov_b32_e32 v14, s34
+; GFX8-NEXT: v_mov_b32_e32 v14, s30
; GFX8-NEXT: s_lshr_b32 s74, s2, 23
; GFX8-NEXT: s_lshr_b32 s72, s2, 20
; GFX8-NEXT: v_mov_b32_e32 v2, s36
@@ -10272,8 +10279,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_lshr_b32 s40, s2, 4
; GFX8-NEXT: s_lshr_b32 s38, s2, 5
; GFX8-NEXT: s_lshr_b32 s36, s2, 2
-; GFX8-NEXT: s_lshr_b32 s34, s2, 3
-; GFX8-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x10000
+; GFX8-NEXT: s_lshr_b32 s30, s2, 3
+; GFX8-NEXT: s_bfe_i64 s[24:25], s[2:3], 0x10000
; GFX8-NEXT: s_bfe_i64 s[2:3], s[48:49], 0x10000
; GFX8-NEXT: v_writelane_b32 v62, s2, 4
; GFX8-NEXT: v_writelane_b32 v62, s3, 5
@@ -10287,26 +10294,26 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x10000
; GFX8-NEXT: v_readlane_b32 s2, v62, 0
; GFX8-NEXT: v_readlane_b32 s3, v62, 1
-; GFX8-NEXT: v_mov_b32_e32 v5, s75
-; GFX8-NEXT: v_mov_b32_e32 v13, s73
-; GFX8-NEXT: v_mov_b32_e32 v15, s35
-; GFX8-NEXT: v_mov_b32_e32 v1, s71
+; GFX8-NEXT: v_mov_b32_e32 v5, s65
+; GFX8-NEXT: v_mov_b32_e32 v13, s69
+; GFX8-NEXT: v_mov_b32_e32 v15, s31
+; GFX8-NEXT: v_mov_b32_e32 v1, s73
; GFX8-NEXT: v_mov_b32_e32 v3, s37
-; GFX8-NEXT: v_mov_b32_e32 v9, s69
+; GFX8-NEXT: v_mov_b32_e32 v9, s75
; GFX8-NEXT: v_mov_b32_e32 v11, s39
-; GFX8-NEXT: v_mov_b32_e32 v17, s65
+; GFX8-NEXT: v_mov_b32_e32 v17, s71
; GFX8-NEXT: v_mov_b32_e32 v19, s41
-; GFX8-NEXT: v_mov_b32_e32 v21, s61
+; GFX8-NEXT: v_mov_b32_e32 v21, s67
; GFX8-NEXT: v_mov_b32_e32 v23, s43
-; GFX8-NEXT: v_mov_b32_e32 v25, s67
+; GFX8-NEXT: v_mov_b32_e32 v25, s63
; GFX8-NEXT: v_mov_b32_e32 v27, s45
; GFX8-NEXT: v_mov_b32_e32 v29, s57
; GFX8-NEXT: v_mov_b32_e32 v31, s47
; GFX8-NEXT: v_mov_b32_e32 v33, s59
; GFX8-NEXT: v_mov_b32_e32 v35, s49
-; GFX8-NEXT: v_mov_b32_e32 v37, s63
+; GFX8-NEXT: v_mov_b32_e32 v37, s61
; GFX8-NEXT: v_mov_b32_e32 v41, s55
-; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
@@ -10332,9 +10339,9 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
; GFX8-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
; GFX8-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
+; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
@@ -10450,17 +10457,17 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xe0
-; GFX8-NEXT: v_mov_b32_e32 v0, s24
-; GFX8-NEXT: v_mov_b32_e32 v1, s25
-; GFX8-NEXT: v_mov_b32_e32 v2, s26
-; GFX8-NEXT: v_mov_b32_e32 v3, s27
+; GFX8-NEXT: v_mov_b32_e32 v0, s26
+; GFX8-NEXT: v_mov_b32_e32 v1, s27
+; GFX8-NEXT: v_mov_b32_e32 v2, s28
+; GFX8-NEXT: v_mov_b32_e32 v3, s29
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xd0
-; GFX8-NEXT: v_mov_b32_e32 v0, s28
-; GFX8-NEXT: v_mov_b32_e32 v1, s29
+; GFX8-NEXT: v_mov_b32_e32 v0, s34
+; GFX8-NEXT: v_mov_b32_e32 v1, s35
; GFX8-NEXT: v_mov_b32_e32 v2, s86
; GFX8-NEXT: v_mov_b32_e32 v3, s87
; GFX8-NEXT: s_addc_u32 s1, s9, 0
@@ -10576,15 +10583,15 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s36
; GFX8-NEXT: v_mov_b32_e32 v1, s37
-; GFX8-NEXT: v_mov_b32_e32 v2, s34
-; GFX8-NEXT: v_mov_b32_e32 v3, s35
+; GFX8-NEXT: v_mov_b32_e32 v2, s30
+; GFX8-NEXT: v_mov_b32_e32 v3, s31
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_readlane_b32 s0, v62, 4
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_readlane_b32 s1, v62, 5
; GFX8-NEXT: v_mov_b32_e32 v4, s8
-; GFX8-NEXT: v_mov_b32_e32 v0, s30
-; GFX8-NEXT: v_mov_b32_e32 v1, s31
+; GFX8-NEXT: v_mov_b32_e32 v0, s24
+; GFX8-NEXT: v_mov_b32_e32 v1, s25
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v5, s9
@@ -10962,74 +10969,75 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-LABEL: constant_sextload_v64i1_to_v64i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_load_b64 s[10:11], s[2:3], 0x0
+; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshr_b32 s96, s11, 30
-; GFX12-NEXT: s_lshr_b32 s98, s11, 31
-; GFX12-NEXT: s_lshr_b32 s92, s11, 28
-; GFX12-NEXT: s_lshr_b32 s94, s11, 29
-; GFX12-NEXT: s_lshr_b32 s78, s11, 26
-; GFX12-NEXT: s_lshr_b32 s88, s11, 27
+; GFX12-NEXT: s_lshr_b32 s96, s3, 30
+; GFX12-NEXT: s_lshr_b32 s98, s3, 31
+; GFX12-NEXT: s_lshr_b32 s92, s3, 28
+; GFX12-NEXT: s_lshr_b32 s94, s3, 29
+; GFX12-NEXT: s_lshr_b32 s88, s3, 26
+; GFX12-NEXT: s_lshr_b32 s90, s3, 27
; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
; GFX12-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000
-; GFX12-NEXT: s_lshr_b32 s66, s11, 24
-; GFX12-NEXT: s_lshr_b32 s74, s11, 25
+; GFX12-NEXT: s_lshr_b32 s80, s3, 24
+; GFX12-NEXT: s_lshr_b32 s86, s3, 25
; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
; GFX12-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s96
-; GFX12-NEXT: s_lshr_b32 s56, s11, 22
-; GFX12-NEXT: s_lshr_b32 s62, s11, 23
+; GFX12-NEXT: s_lshr_b32 s70, s3, 22
+; GFX12-NEXT: s_lshr_b32 s76, s3, 23
; GFX12-NEXT: v_dual_mov_b32 v2, s97 :: v_dual_mov_b32 v3, s100
; GFX12-NEXT: v_dual_mov_b32 v4, s101 :: v_dual_mov_b32 v5, s92
-; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
; GFX12-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
-; GFX12-NEXT: s_lshr_b32 s44, s11, 20
-; GFX12-NEXT: s_lshr_b32 s52, s11, 21
-; GFX12-NEXT: s_lshr_b32 s30, s11, 18
-; GFX12-NEXT: s_lshr_b32 s40, s11, 19
-; GFX12-NEXT: s_lshr_b32 s18, s11, 16
-; GFX12-NEXT: s_lshr_b32 s26, s11, 17
-; GFX12-NEXT: s_lshr_b32 s2, s11, 14
-; GFX12-NEXT: s_lshr_b32 s4, s11, 15
+; GFX12-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
+; GFX12-NEXT: s_lshr_b32 s60, s3, 20
+; GFX12-NEXT: s_lshr_b32 s66, s3, 21
+; GFX12-NEXT: s_lshr_b32 s50, s3, 18
+; GFX12-NEXT: s_lshr_b32 s56, s3, 19
+; GFX12-NEXT: s_lshr_b32 s40, s3, 16
+; GFX12-NEXT: s_lshr_b32 s48, s3, 17
+; GFX12-NEXT: s_lshr_b32 s6, s3, 14
+; GFX12-NEXT: s_lshr_b32 s8, s3, 15
; GFX12-NEXT: v_dual_mov_b32 v6, s93 :: v_dual_mov_b32 v7, s94
-; GFX12-NEXT: v_dual_mov_b32 v8, s95 :: v_dual_mov_b32 v9, s78
+; GFX12-NEXT: v_dual_mov_b32 v8, s95 :: v_dual_mov_b32 v9, s88
+; GFX12-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
+; GFX12-NEXT: s_lshr_b32 s10, s3, 12
+; GFX12-NEXT: s_lshr_b32 s12, s3, 13
+; GFX12-NEXT: v_dual_mov_b32 v10, s89 :: v_dual_mov_b32 v11, s90
+; GFX12-NEXT: v_dual_mov_b32 v12, s91 :: v_dual_mov_b32 v13, s80
+; GFX12-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
+; GFX12-NEXT: s_lshr_b32 s14, s3, 10
+; GFX12-NEXT: s_lshr_b32 s16, s3, 11
+; GFX12-NEXT: v_dual_mov_b32 v14, s81 :: v_dual_mov_b32 v15, s86
+; GFX12-NEXT: v_dual_mov_b32 v16, s87 :: v_dual_mov_b32 v17, s70
+; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
-; GFX12-NEXT: s_lshr_b32 s6, s11, 12
-; GFX12-NEXT: s_lshr_b32 s8, s11, 13
-; GFX12-NEXT: v_dual_mov_b32 v10, s79 :: v_dual_mov_b32 v11, s88
-; GFX12-NEXT: v_dual_mov_b32 v12, s89 :: v_dual_mov_b32 v13, s66
+; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
-; GFX12-NEXT: s_lshr_b32 s12, s11, 10
-; GFX12-NEXT: s_lshr_b32 s14, s11, 11
-; GFX12-NEXT: v_dual_mov_b32 v14, s67 :: v_dual_mov_b32 v15, s74
-; GFX12-NEXT: v_dual_mov_b32 v16, s75 :: v_dual_mov_b32 v17, s56
-; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
-; GFX12-NEXT: s_lshr_b32 s16, s11, 8
-; GFX12-NEXT: s_lshr_b32 s20, s11, 9
-; GFX12-NEXT: v_dual_mov_b32 v18, s57 :: v_dual_mov_b32 v19, s62
-; GFX12-NEXT: v_dual_mov_b32 v20, s63 :: v_dual_mov_b32 v21, s44
+; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
-; GFX12-NEXT: s_lshr_b32 s22, s11, 6
-; GFX12-NEXT: s_lshr_b32 s24, s11, 7
-; GFX12-NEXT: v_dual_mov_b32 v22, s45 :: v_dual_mov_b32 v23, s52
-; GFX12-NEXT: v_dual_mov_b32 v24, s53 :: v_dual_mov_b32 v25, s30
-; GFX12-NEXT: v_dual_mov_b32 v26, s31 :: v_dual_mov_b32 v27, s40
-; GFX12-NEXT: v_dual_mov_b32 v28, s41 :: v_dual_mov_b32 v29, s18
-; GFX12-NEXT: v_dual_mov_b32 v30, s19 :: v_dual_mov_b32 v31, s26
-; GFX12-NEXT: v_mov_b32_e32 v32, s27
-; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
+; GFX12-NEXT: s_lshr_b32 s18, s3, 8
+; GFX12-NEXT: s_lshr_b32 s20, s3, 9
+; GFX12-NEXT: v_dual_mov_b32 v18, s71 :: v_dual_mov_b32 v19, s76
+; GFX12-NEXT: v_dual_mov_b32 v20, s77 :: v_dual_mov_b32 v21, s60
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX12-NEXT: s_lshr_b32 s22, s3, 6
+; GFX12-NEXT: s_lshr_b32 s24, s3, 7
+; GFX12-NEXT: v_dual_mov_b32 v22, s61 :: v_dual_mov_b32 v23, s66
+; GFX12-NEXT: v_dual_mov_b32 v24, s67 :: v_dual_mov_b32 v25, s50
+; GFX12-NEXT: v_dual_mov_b32 v26, s51 :: v_dual_mov_b32 v27, s56
+; GFX12-NEXT: v_dual_mov_b32 v28, s57 :: v_dual_mov_b32 v29, s40
+; GFX12-NEXT: v_dual_mov_b32 v30, s41 :: v_dual_mov_b32 v31, s48
+; GFX12-NEXT: v_mov_b32_e32 v32, s49
+; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX12-NEXT: s_clause 0x7
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:496
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:480
@@ -11039,43 +11047,43 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:416
; GFX12-NEXT: global_store_b128 v0, v[25:28], s[0:1] offset:400
; GFX12-NEXT: global_store_b128 v0, v[29:32], s[0:1] offset:384
-; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
-; GFX12-NEXT: v_dual_mov_b32 v3, s4 :: v_dual_mov_b32 v4, s5
-; GFX12-NEXT: v_mov_b32_e32 v5, s6
-; GFX12-NEXT: s_lshr_b32 s28, s11, 4
-; GFX12-NEXT: s_lshr_b32 s34, s11, 5
-; GFX12-NEXT: s_lshr_b32 s36, s11, 2
-; GFX12-NEXT: s_lshr_b32 s38, s11, 3
+; GFX12-NEXT: v_dual_mov_b32 v1, s6 :: v_dual_mov_b32 v2, s7
+; GFX12-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v4, s9
+; GFX12-NEXT: v_mov_b32_e32 v5, s10
+; GFX12-NEXT: s_lshr_b32 s26, s3, 4
+; GFX12-NEXT: s_lshr_b32 s28, s3, 5
+; GFX12-NEXT: s_lshr_b32 s30, s3, 2
+; GFX12-NEXT: s_lshr_b32 s34, s3, 3
; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v7, s8
-; GFX12-NEXT: v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v9, s12
-; GFX12-NEXT: s_lshr_b32 s42, s11, 1
-; GFX12-NEXT: s_mov_b32 s46, s11
+; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v6, s11 :: v_dual_mov_b32 v7, s12
+; GFX12-NEXT: v_dual_mov_b32 v8, s13 :: v_dual_mov_b32 v9, s14
+; GFX12-NEXT: s_lshr_b32 s36, s3, 1
+; GFX12-NEXT: s_mov_b32 s4, s3
; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v11, s14
-; GFX12-NEXT: v_dual_mov_b32 v12, s15 :: v_dual_mov_b32 v13, s16
-; GFX12-NEXT: s_lshr_b32 s48, s10, 30
-; GFX12-NEXT: s_lshr_b32 s50, s10, 31
-; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v10, s15 :: v_dual_mov_b32 v11, s16
+; GFX12-NEXT: v_dual_mov_b32 v12, s17 :: v_dual_mov_b32 v13, s18
+; GFX12-NEXT: s_lshr_b32 s38, s2, 30
+; GFX12-NEXT: s_lshr_b32 s42, s2, 31
; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v14, s17 :: v_dual_mov_b32 v15, s20
+; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v14, s19 :: v_dual_mov_b32 v15, s20
; GFX12-NEXT: v_dual_mov_b32 v16, s21 :: v_dual_mov_b32 v17, s22
-; GFX12-NEXT: s_lshr_b32 s54, s10, 28
-; GFX12-NEXT: s_lshr_b32 s58, s10, 29
-; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX12-NEXT: s_lshr_b32 s44, s2, 28
+; GFX12-NEXT: s_lshr_b32 s46, s2, 29
+; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v18, s23 :: v_dual_mov_b32 v19, s24
-; GFX12-NEXT: v_dual_mov_b32 v20, s25 :: v_dual_mov_b32 v21, s28
-; GFX12-NEXT: s_lshr_b32 s60, s10, 26
-; GFX12-NEXT: s_lshr_b32 s64, s10, 27
-; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v22, s29 :: v_dual_mov_b32 v23, s34
-; GFX12-NEXT: v_mov_b32_e32 v24, s35
+; GFX12-NEXT: v_dual_mov_b32 v20, s25 :: v_dual_mov_b32 v21, s26
+; GFX12-NEXT: s_lshr_b32 s52, s2, 26
+; GFX12-NEXT: s_lshr_b32 s54, s2, 27
+; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v22, s27 :: v_dual_mov_b32 v23, s28
+; GFX12-NEXT: v_mov_b32_e32 v24, s29
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:368
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:352
@@ -11083,50 +11091,50 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:320
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:304
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:288
-; GFX12-NEXT: v_dual_mov_b32 v1, s36 :: v_dual_mov_b32 v2, s37
-; GFX12-NEXT: v_dual_mov_b32 v3, s38 :: v_dual_mov_b32 v4, s39
-; GFX12-NEXT: v_mov_b32_e32 v5, s46
-; GFX12-NEXT: s_lshr_b32 s68, s10, 24
-; GFX12-NEXT: s_lshr_b32 s70, s10, 25
-; GFX12-NEXT: s_lshr_b32 s72, s10, 22
-; GFX12-NEXT: s_lshr_b32 s76, s10, 23
-; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v1, s30 :: v_dual_mov_b32 v2, s31
+; GFX12-NEXT: v_dual_mov_b32 v3, s34 :: v_dual_mov_b32 v4, s35
+; GFX12-NEXT: v_mov_b32_e32 v5, s4
+; GFX12-NEXT: s_lshr_b32 s58, s2, 24
+; GFX12-NEXT: s_lshr_b32 s62, s2, 25
+; GFX12-NEXT: s_lshr_b32 s64, s2, 22
+; GFX12-NEXT: s_lshr_b32 s68, s2, 23
+; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v7, s36
+; GFX12-NEXT: v_dual_mov_b32 v8, s37 :: v_dual_mov_b32 v9, s38
+; GFX12-NEXT: s_lshr_b32 s72, s2, 20
+; GFX12-NEXT: s_lshr_b32 s74, s2, 21
; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v6, s47 :: v_dual_mov_b32 v7, s42
-; GFX12-NEXT: v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v9, s48
-; GFX12-NEXT: s_lshr_b32 s80, s10, 20
-; GFX12-NEXT: s_lshr_b32 s82, s10, 21
+; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v10, s39 :: v_dual_mov_b32 v11, s42
+; GFX12-NEXT: v_dual_mov_b32 v12, s43 :: v_dual_mov_b32 v13, s44
+; GFX12-NEXT: s_lshr_b32 s78, s2, 18
+; GFX12-NEXT: s_lshr_b32 s82, s2, 19
+; GFX12-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v10, s49 :: v_dual_mov_b32 v11, s50
-; GFX12-NEXT: v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v13, s54
-; GFX12-NEXT: s_lshr_b32 s84, s10, 18
-; GFX12-NEXT: s_lshr_b32 s86, s10, 19
-; GFX12-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v14, s45 :: v_dual_mov_b32 v15, s46
+; GFX12-NEXT: v_dual_mov_b32 v16, s47 :: v_dual_mov_b32 v17, s52
+; GFX12-NEXT: s_lshr_b32 s84, s2, 16
+; GFX12-NEXT: s_lshr_b32 s98, s2, 17
+; GFX12-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
; GFX12-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v14, s55 :: v_dual_mov_b32 v15, s58
-; GFX12-NEXT: v_dual_mov_b32 v16, s59 :: v_dual_mov_b32 v17, s60
-; GFX12-NEXT: s_lshr_b32 s90, s10, 16
-; GFX12-NEXT: s_lshr_b32 s98, s10, 17
+; GFX12-NEXT: v_dual_mov_b32 v18, s53 :: v_dual_mov_b32 v19, s54
+; GFX12-NEXT: v_dual_mov_b32 v20, s55 :: v_dual_mov_b32 v21, s58
+; GFX12-NEXT: s_lshr_b32 s96, s2, 14
+; GFX12-NEXT: s_lshr_b32 s100, s2, 15
+; GFX12-NEXT: s_lshr_b32 s94, s2, 13
+; GFX12-NEXT: s_lshr_b32 s90, s2, 11
+; GFX12-NEXT: s_lshr_b32 s86, s2, 9
+; GFX12-NEXT: s_lshr_b32 s76, s2, 7
+; GFX12-NEXT: s_lshr_b32 s66, s2, 5
+; GFX12-NEXT: s_lshr_b32 s56, s2, 3
+; GFX12-NEXT: s_lshr_b32 s48, s2, 1
; GFX12-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v18, s61 :: v_dual_mov_b32 v19, s64
-; GFX12-NEXT: v_dual_mov_b32 v20, s65 :: v_dual_mov_b32 v21, s68
-; GFX12-NEXT: s_lshr_b32 s96, s10, 14
-; GFX12-NEXT: s_lshr_b32 s100, s10, 15
-; GFX12-NEXT: s_lshr_b32 s94, s10, 13
-; GFX12-NEXT: s_lshr_b32 s88, s10, 11
-; GFX12-NEXT: s_lshr_b32 s74, s10, 9
-; GFX12-NEXT: s_lshr_b32 s62, s10, 7
-; GFX12-NEXT: s_lshr_b32 s52, s10, 5
-; GFX12-NEXT: s_lshr_b32 s40, s10, 3
-; GFX12-NEXT: s_lshr_b32 s26, s10, 1
-; GFX12-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v22, s69 :: v_dual_mov_b32 v23, s70
-; GFX12-NEXT: v_mov_b32_e32 v24, s71
+; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v22, s59 :: v_dual_mov_b32 v23, s62
+; GFX12-NEXT: v_mov_b32_e32 v24, s63
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:272
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:256
@@ -11134,43 +11142,43 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:224
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:208
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:192
-; GFX12-NEXT: v_dual_mov_b32 v1, s72 :: v_dual_mov_b32 v2, s73
-; GFX12-NEXT: v_dual_mov_b32 v3, s76 :: v_dual_mov_b32 v4, s77
-; GFX12-NEXT: v_mov_b32_e32 v5, s80
-; GFX12-NEXT: s_lshr_b32 s92, s10, 12
-; GFX12-NEXT: s_lshr_b32 s78, s10, 10
+; GFX12-NEXT: v_dual_mov_b32 v1, s64 :: v_dual_mov_b32 v2, s65
+; GFX12-NEXT: v_dual_mov_b32 v3, s68 :: v_dual_mov_b32 v4, s69
+; GFX12-NEXT: v_mov_b32_e32 v5, s72
+; GFX12-NEXT: s_lshr_b32 s92, s2, 12
+; GFX12-NEXT: s_lshr_b32 s88, s2, 10
; GFX12-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v6, s81 :: v_dual_mov_b32 v7, s82
-; GFX12-NEXT: v_dual_mov_b32 v8, s83 :: v_dual_mov_b32 v9, s84
-; GFX12-NEXT: s_lshr_b32 s66, s10, 8
-; GFX12-NEXT: s_lshr_b32 s56, s10, 6
-; GFX12-NEXT: s_lshr_b32 s44, s10, 4
-; GFX12-NEXT: s_lshr_b32 s30, s10, 2
-; GFX12-NEXT: s_bfe_i64 s[18:19], s[10:11], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v6, s73 :: v_dual_mov_b32 v7, s74
+; GFX12-NEXT: v_dual_mov_b32 v8, s75 :: v_dual_mov_b32 v9, s78
+; GFX12-NEXT: s_lshr_b32 s80, s2, 8
+; GFX12-NEXT: s_lshr_b32 s70, s2, 6
+; GFX12-NEXT: s_lshr_b32 s60, s2, 4
+; GFX12-NEXT: s_lshr_b32 s50, s2, 2
+; GFX12-NEXT: s_bfe_i64 s[40:41], s[2:3], 0x10000
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000
-; GFX12-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[48:49], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[48:49], s[56:57], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[56:57], s[66:67], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[66:67], s[76:77], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[76:77], s[86:87], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[86:87], s[90:91], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[90:91], s[94:95], 0x10000
; GFX12-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000
; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v10, s85 :: v_dual_mov_b32 v11, s86
-; GFX12-NEXT: v_dual_mov_b32 v12, s87 :: v_dual_mov_b32 v13, s90
-; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v10, s79 :: v_dual_mov_b32 v11, s82
+; GFX12-NEXT: v_dual_mov_b32 v12, s83 :: v_dual_mov_b32 v13, s84
+; GFX12-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v14, s91 :: v_dual_mov_b32 v15, s98
+; GFX12-NEXT: v_dual_mov_b32 v14, s85 :: v_dual_mov_b32 v15, s98
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v16, s99 :: v_dual_mov_b32 v17, s96
-; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
+; GFX12-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v18, s97 :: v_dual_mov_b32 v19, s94
; GFX12-NEXT: v_dual_mov_b32 v20, s95 :: v_dual_mov_b32 v21, s92
-; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v22, s93 :: v_dual_mov_b32 v23, s88
-; GFX12-NEXT: v_mov_b32_e32 v24, s89
+; GFX12-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v22, s93 :: v_dual_mov_b32 v23, s90
+; GFX12-NEXT: v_mov_b32_e32 v24, s91
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:176
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:160
@@ -11178,23 +11186,23 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:128
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:96
-; GFX12-NEXT: v_dual_mov_b32 v1, s78 :: v_dual_mov_b32 v2, s79
-; GFX12-NEXT: v_dual_mov_b32 v3, s74 :: v_dual_mov_b32 v4, s75
+; GFX12-NEXT: v_dual_mov_b32 v1, s88 :: v_dual_mov_b32 v2, s89
+; GFX12-NEXT: v_dual_mov_b32 v3, s86 :: v_dual_mov_b32 v4, s87
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: v_mov_b32_e32 v5, s66
-; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v6, s67 :: v_dual_mov_b32 v7, s62
-; GFX12-NEXT: v_dual_mov_b32 v8, s63 :: v_dual_mov_b32 v9, s56
-; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX12-NEXT: v_dual_mov_b32 v10, s57 :: v_dual_mov_b32 v11, s52
+; GFX12-NEXT: v_mov_b32_e32 v5, s80
+; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v6, s81 :: v_dual_mov_b32 v7, s76
+; GFX12-NEXT: v_dual_mov_b32 v8, s77 :: v_dual_mov_b32 v9, s70
+; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
+; GFX12-NEXT: v_dual_mov_b32 v10, s71 :: v_dual_mov_b32 v11, s66
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: v_dual_mov_b32 v12, s53 :: v_dual_mov_b32 v13, s44
-; GFX12-NEXT: v_dual_mov_b32 v14, s45 :: v_dual_mov_b32 v15, s40
-; GFX12-NEXT: v_dual_mov_b32 v16, s41 :: v_dual_mov_b32 v17, s30
-; GFX12-NEXT: v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v19, s26
-; GFX12-NEXT: v_dual_mov_b32 v20, s27 :: v_dual_mov_b32 v21, s18
-; GFX12-NEXT: v_dual_mov_b32 v22, s19 :: v_dual_mov_b32 v23, s10
-; GFX12-NEXT: v_mov_b32_e32 v24, s11
+; GFX12-NEXT: v_dual_mov_b32 v12, s67 :: v_dual_mov_b32 v13, s60
+; GFX12-NEXT: v_dual_mov_b32 v14, s61 :: v_dual_mov_b32 v15, s56
+; GFX12-NEXT: v_dual_mov_b32 v16, s57 :: v_dual_mov_b32 v17, s50
+; GFX12-NEXT: v_dual_mov_b32 v18, s51 :: v_dual_mov_b32 v19, s48
+; GFX12-NEXT: v_dual_mov_b32 v20, s49 :: v_dual_mov_b32 v21, s40
+; GFX12-NEXT: v_dual_mov_b32 v22, s41 :: v_dual_mov_b32 v23, s2
+; GFX12-NEXT: v_mov_b32_e32 v24, s3
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:64
@@ -11207,74 +11215,75 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-LABEL: constant_sextload_v64i1_to_v64i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_load_b64 s[10:11], s[2:3], 0x0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_lshr_b32 s96, s11, 30
-; GFX1250-NEXT: s_lshr_b32 s98, s11, 31
-; GFX1250-NEXT: s_lshr_b32 s92, s11, 28
-; GFX1250-NEXT: s_lshr_b32 s94, s11, 29
-; GFX1250-NEXT: s_lshr_b32 s78, s11, 26
-; GFX1250-NEXT: s_lshr_b32 s88, s11, 27
+; GFX1250-NEXT: s_lshr_b32 s96, s3, 30
+; GFX1250-NEXT: s_lshr_b32 s98, s3, 31
+; GFX1250-NEXT: s_lshr_b32 s92, s3, 28
+; GFX1250-NEXT: s_lshr_b32 s94, s3, 29
+; GFX1250-NEXT: s_lshr_b32 s88, s3, 26
+; GFX1250-NEXT: s_lshr_b32 s90, s3, 27
; GFX1250-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s66, s11, 24
-; GFX1250-NEXT: s_lshr_b32 s74, s11, 25
+; GFX1250-NEXT: s_lshr_b32 s80, s3, 24
+; GFX1250-NEXT: s_lshr_b32 s86, s3, 25
; GFX1250-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000
; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s96
-; GFX1250-NEXT: s_lshr_b32 s56, s11, 22
-; GFX1250-NEXT: s_lshr_b32 s62, s11, 23
+; GFX1250-NEXT: s_lshr_b32 s70, s3, 22
+; GFX1250-NEXT: s_lshr_b32 s76, s3, 23
; GFX1250-NEXT: v_dual_mov_b32 v1, s97 :: v_dual_mov_b32 v2, s100
; GFX1250-NEXT: v_dual_mov_b32 v3, s101 :: v_dual_mov_b32 v4, s92
-; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s44, s11, 20
-; GFX1250-NEXT: s_lshr_b32 s52, s11, 21
-; GFX1250-NEXT: s_lshr_b32 s30, s11, 18
-; GFX1250-NEXT: s_lshr_b32 s40, s11, 19
-; GFX1250-NEXT: s_lshr_b32 s18, s11, 16
-; GFX1250-NEXT: s_lshr_b32 s26, s11, 17
-; GFX1250-NEXT: s_lshr_b32 s2, s11, 14
-; GFX1250-NEXT: s_lshr_b32 s4, s11, 15
+; GFX1250-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s60, s3, 20
+; GFX1250-NEXT: s_lshr_b32 s66, s3, 21
+; GFX1250-NEXT: s_lshr_b32 s50, s3, 18
+; GFX1250-NEXT: s_lshr_b32 s56, s3, 19
+; GFX1250-NEXT: s_lshr_b32 s40, s3, 16
+; GFX1250-NEXT: s_lshr_b32 s48, s3, 17
+; GFX1250-NEXT: s_lshr_b32 s6, s3, 14
+; GFX1250-NEXT: s_lshr_b32 s8, s3, 15
; GFX1250-NEXT: v_dual_mov_b32 v5, s93 :: v_dual_mov_b32 v6, s94
-; GFX1250-NEXT: v_dual_mov_b32 v7, s95 :: v_dual_mov_b32 v10, s78
+; GFX1250-NEXT: v_dual_mov_b32 v7, s95 :: v_dual_mov_b32 v10, s88
+; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s10, s3, 12
+; GFX1250-NEXT: s_lshr_b32 s12, s3, 13
+; GFX1250-NEXT: v_dual_mov_b32 v11, s89 :: v_dual_mov_b32 v12, s90
+; GFX1250-NEXT: v_dual_mov_b32 v13, s91 :: v_dual_mov_b32 v14, s80
+; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s14, s3, 10
+; GFX1250-NEXT: s_lshr_b32 s16, s3, 11
+; GFX1250-NEXT: v_dual_mov_b32 v15, s81 :: v_dual_mov_b32 v16, s86
+; GFX1250-NEXT: v_dual_mov_b32 v17, s87 :: v_dual_mov_b32 v18, s70
+; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s6, s11, 12
-; GFX1250-NEXT: s_lshr_b32 s8, s11, 13
-; GFX1250-NEXT: v_dual_mov_b32 v11, s79 :: v_dual_mov_b32 v12, s88
-; GFX1250-NEXT: v_dual_mov_b32 v13, s89 :: v_dual_mov_b32 v14, s66
+; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s12, s11, 10
-; GFX1250-NEXT: s_lshr_b32 s14, s11, 11
-; GFX1250-NEXT: v_dual_mov_b32 v15, s67 :: v_dual_mov_b32 v16, s74
-; GFX1250-NEXT: v_dual_mov_b32 v17, s75 :: v_dual_mov_b32 v18, s56
-; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s16, s11, 8
-; GFX1250-NEXT: s_lshr_b32 s20, s11, 9
-; GFX1250-NEXT: v_dual_mov_b32 v19, s57 :: v_dual_mov_b32 v20, s62
-; GFX1250-NEXT: v_dual_mov_b32 v21, s63 :: v_dual_mov_b32 v22, s44
+; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s22, s11, 6
-; GFX1250-NEXT: s_lshr_b32 s24, s11, 7
-; GFX1250-NEXT: v_dual_mov_b32 v23, s45 :: v_dual_mov_b32 v24, s52
-; GFX1250-NEXT: v_dual_mov_b32 v25, s53 :: v_dual_mov_b32 v26, s30
-; GFX1250-NEXT: v_dual_mov_b32 v27, s31 :: v_dual_mov_b32 v28, s40
-; GFX1250-NEXT: v_dual_mov_b32 v29, s41 :: v_dual_mov_b32 v30, s18
-; GFX1250-NEXT: v_dual_mov_b32 v31, s19 :: v_dual_mov_b32 v32, s26
-; GFX1250-NEXT: v_mov_b32_e32 v33, s27
-; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s18, s3, 8
+; GFX1250-NEXT: s_lshr_b32 s20, s3, 9
+; GFX1250-NEXT: v_dual_mov_b32 v19, s71 :: v_dual_mov_b32 v20, s76
+; GFX1250-NEXT: v_dual_mov_b32 v21, s77 :: v_dual_mov_b32 v22, s60
; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s22, s3, 6
+; GFX1250-NEXT: s_lshr_b32 s24, s3, 7
+; GFX1250-NEXT: v_dual_mov_b32 v23, s61 :: v_dual_mov_b32 v24, s66
+; GFX1250-NEXT: v_dual_mov_b32 v25, s67 :: v_dual_mov_b32 v26, s50
+; GFX1250-NEXT: v_dual_mov_b32 v27, s51 :: v_dual_mov_b32 v28, s56
+; GFX1250-NEXT: v_dual_mov_b32 v29, s57 :: v_dual_mov_b32 v30, s40
+; GFX1250-NEXT: v_dual_mov_b32 v31, s41 :: v_dual_mov_b32 v32, s48
+; GFX1250-NEXT: v_mov_b32_e32 v33, s49
+; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX1250-NEXT: s_clause 0x7
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:496
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:480
@@ -11285,48 +11294,48 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v8, v[26:29], s[0:1] offset:400
; GFX1250-NEXT: global_store_b128 v8, v[30:33], s[0:1] offset:384
; GFX1250-NEXT: s_wait_xcnt 0x7
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
+; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
+; GFX1250-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s9
; GFX1250-NEXT: s_wait_xcnt 0x6
-; GFX1250-NEXT: v_mov_b32_e32 v4, s6
-; GFX1250-NEXT: s_lshr_b32 s28, s11, 4
-; GFX1250-NEXT: s_lshr_b32 s34, s11, 5
-; GFX1250-NEXT: s_lshr_b32 s36, s11, 2
-; GFX1250-NEXT: s_lshr_b32 s38, s11, 3
+; GFX1250-NEXT: v_mov_b32_e32 v4, s10
+; GFX1250-NEXT: s_lshr_b32 s26, s3, 4
+; GFX1250-NEXT: s_lshr_b32 s28, s3, 5
+; GFX1250-NEXT: s_lshr_b32 s30, s3, 2
+; GFX1250-NEXT: s_lshr_b32 s34, s3, 3
; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s7 :: v_dual_mov_b32 v6, s8
+; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v6, s12
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v10, s12
-; GFX1250-NEXT: s_lshr_b32 s42, s11, 1
-; GFX1250-NEXT: s_mov_b32 s46, s11
+; GFX1250-NEXT: v_dual_mov_b32 v7, s13 :: v_dual_mov_b32 v10, s14
+; GFX1250-NEXT: s_lshr_b32 s36, s3, 1
+; GFX1250-NEXT: s_mov_b32 s4, s3
; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s13 :: v_dual_mov_b32 v12, s14
+; GFX1250-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v12, s16
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v14, s16
-; GFX1250-NEXT: s_lshr_b32 s48, s10, 30
-; GFX1250-NEXT: s_lshr_b32 s50, s10, 31
-; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v13, s17 :: v_dual_mov_b32 v14, s18
+; GFX1250-NEXT: s_lshr_b32 s38, s2, 30
+; GFX1250-NEXT: s_lshr_b32 s42, s2, 31
; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v15, s17 :: v_dual_mov_b32 v16, s20
+; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v15, s19 :: v_dual_mov_b32 v16, s20
; GFX1250-NEXT: s_wait_xcnt 0x3
; GFX1250-NEXT: v_dual_mov_b32 v17, s21 :: v_dual_mov_b32 v18, s22
-; GFX1250-NEXT: s_lshr_b32 s54, s10, 28
-; GFX1250-NEXT: s_lshr_b32 s58, s10, 29
-; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s44, s2, 28
+; GFX1250-NEXT: s_lshr_b32 s46, s2, 29
+; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX1250-NEXT: v_dual_mov_b32 v19, s23 :: v_dual_mov_b32 v20, s24
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v21, s25 :: v_dual_mov_b32 v22, s28
-; GFX1250-NEXT: s_lshr_b32 s60, s10, 26
-; GFX1250-NEXT: s_lshr_b32 s64, s10, 27
-; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v23, s29 :: v_dual_mov_b32 v24, s34
-; GFX1250-NEXT: v_mov_b32_e32 v25, s35
+; GFX1250-NEXT: v_dual_mov_b32 v21, s25 :: v_dual_mov_b32 v22, s26
+; GFX1250-NEXT: s_lshr_b32 s52, s2, 26
+; GFX1250-NEXT: s_lshr_b32 s54, s2, 27
+; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v23, s27 :: v_dual_mov_b32 v24, s28
+; GFX1250-NEXT: v_mov_b32_e32 v25, s29
; GFX1250-NEXT: s_clause 0x5
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:368
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:352
@@ -11335,55 +11344,55 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:304
; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:288
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v1, s37
-; GFX1250-NEXT: v_dual_mov_b32 v2, s38 :: v_dual_mov_b32 v3, s39
+; GFX1250-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v1, s31
+; GFX1250-NEXT: v_dual_mov_b32 v2, s34 :: v_dual_mov_b32 v3, s35
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s46
-; GFX1250-NEXT: s_lshr_b32 s68, s10, 24
-; GFX1250-NEXT: s_lshr_b32 s70, s10, 25
-; GFX1250-NEXT: s_lshr_b32 s72, s10, 22
-; GFX1250-NEXT: s_lshr_b32 s76, s10, 23
-; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s47 :: v_dual_mov_b32 v6, s42
+; GFX1250-NEXT: v_mov_b32_e32 v4, s4
+; GFX1250-NEXT: s_lshr_b32 s58, s2, 24
+; GFX1250-NEXT: s_lshr_b32 s62, s2, 25
+; GFX1250-NEXT: s_lshr_b32 s64, s2, 22
+; GFX1250-NEXT: s_lshr_b32 s68, s2, 23
+; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s36
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v10, s48
-; GFX1250-NEXT: s_lshr_b32 s80, s10, 20
-; GFX1250-NEXT: s_lshr_b32 s82, s10, 21
-; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s49 :: v_dual_mov_b32 v12, s50
+; GFX1250-NEXT: v_dual_mov_b32 v7, s37 :: v_dual_mov_b32 v10, s38
+; GFX1250-NEXT: s_lshr_b32 s72, s2, 20
+; GFX1250-NEXT: s_lshr_b32 s74, s2, 21
+; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v11, s39 :: v_dual_mov_b32 v12, s42
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v13, s51 :: v_dual_mov_b32 v14, s54
-; GFX1250-NEXT: s_lshr_b32 s84, s10, 18
-; GFX1250-NEXT: s_lshr_b32 s86, s10, 19
-; GFX1250-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v13, s43 :: v_dual_mov_b32 v14, s44
+; GFX1250-NEXT: s_lshr_b32 s78, s2, 18
+; GFX1250-NEXT: s_lshr_b32 s82, s2, 19
; GFX1250-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v15, s55 :: v_dual_mov_b32 v16, s58
+; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v15, s45 :: v_dual_mov_b32 v16, s46
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v17, s59 :: v_dual_mov_b32 v18, s60
-; GFX1250-NEXT: s_lshr_b32 s90, s10, 16
-; GFX1250-NEXT: s_lshr_b32 s98, s10, 17
-; GFX1250-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v19, s61 :: v_dual_mov_b32 v20, s64
+; GFX1250-NEXT: v_dual_mov_b32 v17, s47 :: v_dual_mov_b32 v18, s52
+; GFX1250-NEXT: s_lshr_b32 s84, s2, 16
+; GFX1250-NEXT: s_lshr_b32 s98, s2, 17
+; GFX1250-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v19, s53 :: v_dual_mov_b32 v20, s54
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v21, s65 :: v_dual_mov_b32 v22, s68
-; GFX1250-NEXT: s_lshr_b32 s96, s10, 14
-; GFX1250-NEXT: s_lshr_b32 s100, s10, 15
-; GFX1250-NEXT: s_lshr_b32 s94, s10, 13
-; GFX1250-NEXT: s_lshr_b32 s88, s10, 11
-; GFX1250-NEXT: s_lshr_b32 s74, s10, 9
-; GFX1250-NEXT: s_lshr_b32 s62, s10, 7
-; GFX1250-NEXT: s_lshr_b32 s52, s10, 5
-; GFX1250-NEXT: s_lshr_b32 s40, s10, 3
-; GFX1250-NEXT: s_lshr_b32 s26, s10, 1
-; GFX1250-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v23, s69 :: v_dual_mov_b32 v24, s70
-; GFX1250-NEXT: v_mov_b32_e32 v25, s71
+; GFX1250-NEXT: v_dual_mov_b32 v21, s55 :: v_dual_mov_b32 v22, s58
+; GFX1250-NEXT: s_lshr_b32 s96, s2, 14
+; GFX1250-NEXT: s_lshr_b32 s100, s2, 15
+; GFX1250-NEXT: s_lshr_b32 s94, s2, 13
+; GFX1250-NEXT: s_lshr_b32 s90, s2, 11
+; GFX1250-NEXT: s_lshr_b32 s86, s2, 9
+; GFX1250-NEXT: s_lshr_b32 s76, s2, 7
+; GFX1250-NEXT: s_lshr_b32 s66, s2, 5
+; GFX1250-NEXT: s_lshr_b32 s56, s2, 3
+; GFX1250-NEXT: s_lshr_b32 s48, s2, 1
+; GFX1250-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v23, s59 :: v_dual_mov_b32 v24, s62
+; GFX1250-NEXT: v_mov_b32_e32 v25, s63
; GFX1250-NEXT: s_clause 0x5
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:272
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:256
@@ -11392,46 +11401,46 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:208
; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s72 :: v_dual_mov_b32 v1, s73
-; GFX1250-NEXT: v_dual_mov_b32 v2, s76 :: v_dual_mov_b32 v3, s77
+; GFX1250-NEXT: v_dual_mov_b32 v0, s64 :: v_dual_mov_b32 v1, s65
+; GFX1250-NEXT: v_dual_mov_b32 v2, s68 :: v_dual_mov_b32 v3, s69
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s80
-; GFX1250-NEXT: s_lshr_b32 s92, s10, 12
-; GFX1250-NEXT: s_lshr_b32 s78, s10, 10
+; GFX1250-NEXT: v_mov_b32_e32 v4, s72
+; GFX1250-NEXT: s_lshr_b32 s92, s2, 12
+; GFX1250-NEXT: s_lshr_b32 s88, s2, 10
; GFX1250-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s81 :: v_dual_mov_b32 v6, s82
+; GFX1250-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s73 :: v_dual_mov_b32 v6, s74
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s83 :: v_dual_mov_b32 v10, s84
-; GFX1250-NEXT: s_lshr_b32 s66, s10, 8
-; GFX1250-NEXT: s_lshr_b32 s56, s10, 6
-; GFX1250-NEXT: s_lshr_b32 s44, s10, 4
-; GFX1250-NEXT: s_lshr_b32 s30, s10, 2
-; GFX1250-NEXT: s_bfe_i64 s[18:19], s[10:11], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v7, s75 :: v_dual_mov_b32 v10, s78
+; GFX1250-NEXT: s_lshr_b32 s80, s2, 8
+; GFX1250-NEXT: s_lshr_b32 s70, s2, 6
+; GFX1250-NEXT: s_lshr_b32 s60, s2, 4
+; GFX1250-NEXT: s_lshr_b32 s50, s2, 2
+; GFX1250-NEXT: s_bfe_i64 s[40:41], s[2:3], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[2:3], s[48:49], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[48:49], s[56:57], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[56:57], s[66:67], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[66:67], s[76:77], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[76:77], s[86:87], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[86:87], s[90:91], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[90:91], s[94:95], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s85 :: v_dual_mov_b32 v12, s86
+; GFX1250-NEXT: v_dual_mov_b32 v11, s79 :: v_dual_mov_b32 v12, s82
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v13, s87 :: v_dual_mov_b32 v14, s90
-; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v13, s83 :: v_dual_mov_b32 v14, s84
+; GFX1250-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v15, s91 :: v_dual_mov_b32 v16, s98
+; GFX1250-NEXT: v_dual_mov_b32 v15, s85 :: v_dual_mov_b32 v16, s98
; GFX1250-NEXT: s_wait_xcnt 0x1
; GFX1250-NEXT: v_dual_mov_b32 v17, s99 :: v_dual_mov_b32 v18, s96
-; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
; GFX1250-NEXT: v_dual_mov_b32 v19, s97 :: v_dual_mov_b32 v20, s94
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v21, s95 :: v_dual_mov_b32 v22, s92
-; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v23, s93 :: v_dual_mov_b32 v24, s88
-; GFX1250-NEXT: v_mov_b32_e32 v25, s89
+; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v23, s93 :: v_dual_mov_b32 v24, s90
+; GFX1250-NEXT: v_mov_b32_e32 v25, s91
; GFX1250-NEXT: s_clause 0x5
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:176
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:160
@@ -11440,26 +11449,26 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:112
; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s78 :: v_dual_mov_b32 v1, s79
-; GFX1250-NEXT: v_dual_mov_b32 v2, s74 :: v_dual_mov_b32 v3, s75
+; GFX1250-NEXT: v_dual_mov_b32 v0, s88 :: v_dual_mov_b32 v1, s89
+; GFX1250-NEXT: v_dual_mov_b32 v2, s86 :: v_dual_mov_b32 v3, s87
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s66
-; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s67 :: v_dual_mov_b32 v6, s62
+; GFX1250-NEXT: v_mov_b32_e32 v4, s80
+; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s81 :: v_dual_mov_b32 v6, s76
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v10, s56
-; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s57 :: v_dual_mov_b32 v12, s52
+; GFX1250-NEXT: v_dual_mov_b32 v7, s77 :: v_dual_mov_b32 v10, s70
+; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v11, s71 :: v_dual_mov_b32 v12, s66
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v13, s53 :: v_dual_mov_b32 v14, s44
-; GFX1250-NEXT: v_dual_mov_b32 v15, s45 :: v_dual_mov_b32 v16, s40
+; GFX1250-NEXT: v_dual_mov_b32 v13, s67 :: v_dual_mov_b32 v14, s60
+; GFX1250-NEXT: v_dual_mov_b32 v15, s61 :: v_dual_mov_b32 v16, s56
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v17, s41 :: v_dual_mov_b32 v18, s30
-; GFX1250-NEXT: v_dual_mov_b32 v19, s31 :: v_dual_mov_b32 v20, s26
+; GFX1250-NEXT: v_dual_mov_b32 v17, s57 :: v_dual_mov_b32 v18, s50
+; GFX1250-NEXT: v_dual_mov_b32 v19, s51 :: v_dual_mov_b32 v20, s48
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v21, s27 :: v_dual_mov_b32 v22, s18
-; GFX1250-NEXT: v_dual_mov_b32 v23, s19 :: v_dual_mov_b32 v24, s10
-; GFX1250-NEXT: v_mov_b32_e32 v25, s11
+; GFX1250-NEXT: v_dual_mov_b32 v21, s49 :: v_dual_mov_b32 v22, s40
+; GFX1250-NEXT: v_dual_mov_b32 v23, s41 :: v_dual_mov_b32 v24, s2
+; GFX1250-NEXT: v_mov_b32_e32 v25, s3
; GFX1250-NEXT: s_clause 0x5
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:80
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:64
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index a135b43bad0fe..a56360fc8fcbc 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -6177,6 +6177,7 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5
@@ -6203,6 +6204,7 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %ou
; GCN-HSA: ; %bb.0:
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
+; GCN-HSA-NEXT: ; implicit-def: $sgpr4_sgpr5
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
@@ -6236,28 +6238,29 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64:
; GCN-NOHSA-VI: ; %bb.0:
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr4_sgpr5
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s3
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s3
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s3, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
-; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s0, 16
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s5, s1, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s4
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s5
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0
@@ -6298,19 +6301,20 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %ou
; GFX12-LABEL: constant_sextload_v4i16_to_v4i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s6, s3
+; GFX12-NEXT: s_mov_b32 s4, s3
; GFX12-NEXT: s_lshr_b32 s8, s3, 16
-; GFX12-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000
; GFX12-NEXT: s_lshr_b32 s2, s2, 16
-; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s5
-; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v5, s7
-; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s9
+; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s7
+; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v5, s5
+; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s9
; GFX12-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: v_mov_b32_e32 v2, s2
; GFX12-NEXT: s_clause 0x1
@@ -6542,26 +6546,28 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr10_sgpr11
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s6, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s5
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s5
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[4:5], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s5, 31
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s15, s5, 16
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[12:13], 0x100000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s12, s7, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s20, s7, 16
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[10:11], 0x100000
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s20, s7, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s21, s7, 16
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[8:9], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[14:15], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[12:13], 0x100000
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s20
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
@@ -6586,22 +6592,24 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
; GCN-HSA: ; %bb.0:
; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
+; GCN-HSA-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GCN-HSA-NEXT: ; implicit-def: $sgpr2_sgpr3
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: s_mov_b32 s2, s7
-; GCN-HSA-NEXT: s_lshr_b32 s8, s6, 16
-; GCN-HSA-NEXT: s_mov_b32 s10, s5
+; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16
+; GCN-HSA-NEXT: s_mov_b32 s8, s5
; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16
; GCN-HSA-NEXT: s_ashr_i32 s13, s5, 16
; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000
; GCN-HSA-NEXT: s_ashr_i32 s6, s5, 31
; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[12:13], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
; GCN-HSA-NEXT: s_ashr_i32 s12, s7, 31
; GCN-HSA-NEXT: s_ashr_i32 s7, s7, 16
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
@@ -6621,13 +6629,13 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s6
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
@@ -6644,30 +6652,32 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64:
; GCN-NOHSA-VI: ; %bb.0:
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr2_sgpr3
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s5, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[6:7], 0x100000
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s5
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 16
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, s5
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[8:9], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s7
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6
; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 48
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7
; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7
; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 32
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -6675,22 +6685,23 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7
-; GCN-NOHSA-VI-NEXT: s_add_u32 s6, s0, 16
-; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s7, s1, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s6
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s7
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1
+; GCN-NOHSA-VI-NEXT: s_nop 0
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2
+; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s0, 16
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s1, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2
+; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0
@@ -6750,31 +6761,33 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
; GFX12-LABEL: constant_sextload_v8i16_to_v8i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX12-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s14, s7
+; GFX12-NEXT: s_mov_b32 s8, s7
; GFX12-NEXT: s_lshr_b32 s16, s7, 16
-; GFX12-NEXT: s_bfe_i64 s[12:13], s[6:7], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000
; GFX12-NEXT: s_lshr_b32 s6, s6, 16
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000
-; GFX12-NEXT: s_mov_b32 s8, s5
-; GFX12-NEXT: s_lshr_b32 s10, s5, 16
-; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000
+; GFX12-NEXT: s_mov_b32 s2, s5
+; GFX12-NEXT: s_lshr_b32 s12, s5, 16
+; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
; GFX12-NEXT: s_lshr_b32 s4, s4, 16
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s13
-; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v9, s15
-; GFX12-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v11, s17
+; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s15
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v9, s9
+; GFX12-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v11, s17
; GFX12-NEXT: v_dual_mov_b32 v10, s16 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v5, s3
-; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v13, s9
-; GFX12-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s11
-; GFX12-NEXT: v_dual_mov_b32 v14, s10 :: v_dual_mov_b32 v7, s5
+; GFX12-NEXT: v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v5, s11
+; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v13, s3
+; GFX12-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v15, s13
+; GFX12-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v7, s5
; GFX12-NEXT: v_mov_b32_e32 v6, s4
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:48
@@ -7164,15 +7177,19 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr12_sgpr13
; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr14_sgpr15
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr16_sgpr17
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr18_sgpr19
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s7
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s6, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s5
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s4, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, s3
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s2, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s24, s1
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s6, 16
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, s5
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s4, 16
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s3
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s2, 16
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s1
; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s0, 16
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[0:1], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x100000
@@ -7180,40 +7197,40 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[6:7], 0x100000
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s21, s1, 31
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s1, 16
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[0:1], s[24:25], 0x100000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s24, s3, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s25, s3, 16
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[2:3], s[20:21], 0x100000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s20, s5, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s5, 16
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[16:17], 0x100000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s7, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s7, 16
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[0:1], s[18:19], 0x100000
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s25, s3, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s3, 16
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[2:3], s[16:17], 0x100000
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s5, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s5, 16
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[14:15], 0x100000
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s7, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s40, s7, 16
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[12:13], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[26:27], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[24:25], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[22:23], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[20:21], 0x100000
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s38
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s40
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s27
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s20
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s38
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s33
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[2:5], off, s[8:11], 0 offset:80
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s2
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s3
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s25
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s24
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s27
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s25
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[8:11], 0 offset:48
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30
@@ -7226,14 +7243,14 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s14
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s18
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s19
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s17
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:64
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s16
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s17
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s14
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s15
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:32
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s12
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s13
@@ -7244,21 +7261,25 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-HSA: ; %bb.0:
; GCN-HSA-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
-; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $sgpr12_sgpr13
+; GCN-HSA-NEXT: ; implicit-def: $sgpr14_sgpr15
+; GCN-HSA-NEXT: ; implicit-def: $sgpr16_sgpr17
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
+; GCN-HSA-NEXT: ; implicit-def: $sgpr10_sgpr11
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: s_mov_b32 s10, s7
-; GCN-HSA-NEXT: s_lshr_b32 s12, s6, 16
-; GCN-HSA-NEXT: s_mov_b32 s14, s5
-; GCN-HSA-NEXT: s_lshr_b32 s16, s4, 16
+; GCN-HSA-NEXT: s_mov_b32 s12, s5
+; GCN-HSA-NEXT: s_mov_b32 s14, s3
+; GCN-HSA-NEXT: s_mov_b32 s16, s1
; GCN-HSA-NEXT: s_ashr_i32 s25, s1, 31
; GCN-HSA-NEXT: s_ashr_i32 s29, s3, 31
; GCN-HSA-NEXT: s_ashr_i32 s30, s3, 16
-; GCN-HSA-NEXT: s_mov_b32 s18, s3
-; GCN-HSA-NEXT: s_lshr_b32 s20, s2, 16
-; GCN-HSA-NEXT: s_mov_b32 s22, s1
+; GCN-HSA-NEXT: s_lshr_b32 s18, s6, 16
+; GCN-HSA-NEXT: s_lshr_b32 s20, s4, 16
+; GCN-HSA-NEXT: s_lshr_b32 s22, s2, 16
; GCN-HSA-NEXT: s_lshr_b32 s24, s0, 16
; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[2:3], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[10:11], 0x100000
@@ -7273,12 +7294,12 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[24:25], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[22:23], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[16:17], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[22:23], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
; GCN-HSA-NEXT: s_add_u32 s22, s8, 0x70
; GCN-HSA-NEXT: s_addc_u32 s23, s9, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6
@@ -7292,14 +7313,14 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s35
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s34
; GCN-HSA-NEXT: s_add_u32 s6, s8, 0x50
-; GCN-HSA-NEXT: v_mov_b32_e32 v6, s12
-; GCN-HSA-NEXT: v_mov_b32_e32 v7, s13
+; GCN-HSA-NEXT: v_mov_b32_e32 v6, s18
+; GCN-HSA-NEXT: v_mov_b32_e32 v7, s19
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
; GCN-HSA-NEXT: s_addc_u32 s7, s9, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s33
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s31
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7
@@ -7310,15 +7331,15 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
; GCN-HSA-NEXT: s_addc_u32 s5, s9, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s17
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
; GCN-HSA-NEXT: s_add_u32 s4, s8, 48
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: s_addc_u32 s5, s9, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s29
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
@@ -7328,8 +7349,8 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s17
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
; GCN-HSA-NEXT: s_add_u32 s4, s8, 16
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -7353,6 +7374,10 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64:
; GCN-NOHSA-VI: ; %bb.0:
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr12_sgpr13
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr20_sgpr21
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr26_sgpr27
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr34_sgpr35
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
@@ -7364,25 +7389,25 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s0, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[12:13], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[18:19], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x100000
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, s3
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x100000
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s5
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s0, 16
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s20, s3
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s3, 16
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[6:7], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s26, s7
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[4:5], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x100000
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s34, s5
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s5, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4
; GCN-NOHSA-VI-NEXT: s_add_u32 s4, s8, 0x50
@@ -7546,65 +7571,69 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
;
; GFX12-LABEL: constant_sextload_v16i16_to_v16i64:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GFX12-NEXT: ; implicit-def: $sgpr16_sgpr17
+; GFX12-NEXT: ; implicit-def: $sgpr14_sgpr15
+; GFX12-NEXT: ; implicit-def: $sgpr12_sgpr13
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_load_b256 s[4:11], s[2:3], 0x0
+; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s30, s9
-; GFX12-NEXT: s_lshr_b32 s34, s9, 16
-; GFX12-NEXT: s_bfe_i64 s[28:29], s[8:9], 0x100000
-; GFX12-NEXT: s_lshr_b32 s8, s8, 16
-; GFX12-NEXT: s_bfe_i64 s[22:23], s[10:11], 0x100000
-; GFX12-NEXT: s_mov_b32 s24, s11
-; GFX12-NEXT: s_lshr_b32 s26, s11, 16
-; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000
+; GFX12-NEXT: s_mov_b32 s18, s5
+; GFX12-NEXT: s_lshr_b32 s34, s5, 16
+; GFX12-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x100000
+; GFX12-NEXT: s_lshr_b32 s4, s4, 16
+; GFX12-NEXT: s_bfe_i64 s[26:27], s[6:7], 0x100000
+; GFX12-NEXT: s_mov_b32 s16, s7
+; GFX12-NEXT: s_lshr_b32 s28, s7, 16
+; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000
-; GFX12-NEXT: s_lshr_b32 s10, s10, 16
-; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s29
-; GFX12-NEXT: s_mov_b32 s18, s7
-; GFX12-NEXT: s_lshr_b32 s20, s7, 16
-; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v9, s31
-; GFX12-NEXT: v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s35
-; GFX12-NEXT: v_dual_mov_b32 v10, s34 :: v_dual_mov_b32 v3, s9
-; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000
; GFX12-NEXT: s_lshr_b32 s6, s6, 16
-; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v5, s23
-; GFX12-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v13, s25
-; GFX12-NEXT: s_mov_b32 s12, s5
-; GFX12-NEXT: s_lshr_b32 s14, s5, 16
-; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v15, s27
-; GFX12-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v7, s11
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000
-; GFX12-NEXT: s_lshr_b32 s4, s4, 16
+; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s31
+; GFX12-NEXT: s_mov_b32 s14, s3
+; GFX12-NEXT: s_lshr_b32 s24, s3, 16
+; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v4, s26 :: v_dual_mov_b32 v9, s19
+; GFX12-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v11, s35
+; GFX12-NEXT: v_dual_mov_b32 v10, s34 :: v_dual_mov_b32 v3, s5
+; GFX12-NEXT: s_bfe_i64 s[22:23], s[2:3], 0x100000
+; GFX12-NEXT: s_lshr_b32 s2, s2, 16
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v17, s19
-; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v5, s27
+; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v13, s17
+; GFX12-NEXT: s_mov_b32 s12, s1
+; GFX12-NEXT: s_lshr_b32 s20, s1, 16
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s21
-; GFX12-NEXT: v_mov_b32_e32 v18, s20
-; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v15, s29
+; GFX12-NEXT: v_dual_mov_b32 v14, s28 :: v_dual_mov_b32 v7, s7
+; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000
+; GFX12-NEXT: s_lshr_b32 s0, s0, 16
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v17, s15
+; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v19, s25
+; GFX12-NEXT: v_mov_b32_e32 v18, s24
+; GFX12-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000
; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80
-; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64
-; GFX12-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v3, s7
-; GFX12-NEXT: v_dual_mov_b32 v1, s17 :: v_dual_mov_b32 v2, s6
+; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:80
+; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:64
+; GFX12-NEXT: v_dual_mov_b32 v0, s22 :: v_dual_mov_b32 v3, s3
+; GFX12-NEXT: v_dual_mov_b32 v1, s23 :: v_dual_mov_b32 v2, s2
; GFX12-NEXT: v_dual_mov_b32 v9, s13 :: v_dual_mov_b32 v8, s12
-; GFX12-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v10, s14
-; GFX12-NEXT: v_dual_mov_b32 v21, s3 :: v_dual_mov_b32 v20, s2
-; GFX12-NEXT: v_dual_mov_b32 v23, s5 :: v_dual_mov_b32 v22, s4
+; GFX12-NEXT: v_dual_mov_b32 v11, s21 :: v_dual_mov_b32 v10, s20
+; GFX12-NEXT: v_dual_mov_b32 v21, s11 :: v_dual_mov_b32 v20, s10
+; GFX12-NEXT: v_dual_mov_b32 v23, s1 :: v_dual_mov_b32 v22, s0
; GFX12-NEXT: s_clause 0x5
-; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:112
-; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96
-; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:48
-; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:32
-; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:16
-; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
+; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:112
+; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:96
+; GFX12-NEXT: global_store_b128 v24, v[16:19], s[8:9] offset:48
+; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:32
+; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:16
+; GFX12-NEXT: global_store_b128 v24, v[20:23], s[8:9]
; GFX12-NEXT: s_endpgm
%load = load <16 x i16>, ptr addrspace(4) %in
%ext = sext <16 x i16> %load to <16 x i64>
@@ -8309,11 +8338,23 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x9
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr20_sgpr21
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr48_sgpr49
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr50_sgpr51
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr46_sgpr47
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr36_sgpr37
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr42_sgpr43
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr44_sgpr45
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s15
; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, s13
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s22, s11
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s24, s9
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s48, s11
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s50, s9
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s46, s7
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s36, s5
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s42, s3
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s44, s1
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s1, 31
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s25, s1, 16
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s3, 31
@@ -8321,31 +8362,25 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s5, 31
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s5, 16
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s7, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s7, 16
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s9, 31
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[48:49], s[24:25], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[50:51], s[22:23], 0x100000
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s7, 16
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[52:53], s[20:21], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[54:55], s[18:19], 0x100000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s43, s9, 16
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s45, s11, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s47, s11, 16
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s41, s13, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s13, 16
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s57, s15, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s58, s15, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s14, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s12, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s10, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s8, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s46, s7
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s6, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s44, s5
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s4, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s42, s3
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s2, 16
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s40, s1
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s0, 16
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s41, s9, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s9, 16
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s57, s11, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s58, s11, 16
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s59, s13, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s60, s13, 16
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s61, s15, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s62, s15, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s14, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s12, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s10, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s8, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s6, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s4, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s2, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s0, 16
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000
@@ -8364,99 +8399,101 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s53
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s2
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s3
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s50
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s51
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s48
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s49
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s58
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s57
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s56
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s41
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[46:47], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[44:45], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[42:43], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x100000
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s47
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s45
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s43
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s39
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[48:49], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[50:51], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[46:47], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s12
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s13
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s62
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s61
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s60
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s59
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s58
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s57
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s10
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s14
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s12
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s13
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s37
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s35
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s11
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s15
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s14
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:176
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s15
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s33
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s31
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:144
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s16
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s56
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s41
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s17
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s39
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s35
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s17
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s29
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[38:39], 0x100000
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s27
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s40
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s41
+; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s36
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s37
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s33
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s42
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s43
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s29
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s27
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s44
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s45
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s25
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:16
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s10
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s11
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s8
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s9
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[36:37], 0x100000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[34:35], 0x100000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s8
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s9
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[40:41], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[38:39], 0x100000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[34:35], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[30:31], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[28:29], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s6
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s7
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s4
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s5
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s6
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s7
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s4
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s5
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s20
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s21
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s24
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s25
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s18
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s19
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s22
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s23
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s26
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s16
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s17
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s14
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s15
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s8
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s9
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s26
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s27
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s17
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s14
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s15
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2)
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s12
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s13
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, s10
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, s11
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-NOHSA-SI-NEXT: s_endpgm
;
; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
@@ -8465,98 +8502,108 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[8:9], 0x0
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $sgpr24_sgpr25
+; GCN-HSA-NEXT: ; implicit-def: $sgpr34_sgpr35
+; GCN-HSA-NEXT: ; implicit-def: $sgpr36_sgpr37
+; GCN-HSA-NEXT: ; implicit-def: $sgpr30_sgpr31
+; GCN-HSA-NEXT: ; implicit-def: $sgpr28_sgpr29
+; GCN-HSA-NEXT: ; implicit-def: $sgpr38_sgpr39
+; GCN-HSA-NEXT: ; implicit-def: $sgpr48_sgpr49
+; GCN-HSA-NEXT: ; implicit-def: $sgpr50_sgpr51
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: s_mov_b32 s24, s15
-; GCN-HSA-NEXT: s_ashr_i32 s37, s3, 31
-; GCN-HSA-NEXT: s_ashr_i32 s38, s3, 16
-; GCN-HSA-NEXT: s_ashr_i32 s57, s11, 16
-; GCN-HSA-NEXT: s_ashr_i32 s59, s13, 31
-; GCN-HSA-NEXT: s_ashr_i32 s61, s13, 16
-; GCN-HSA-NEXT: s_ashr_i32 s63, s15, 31
-; GCN-HSA-NEXT: s_ashr_i32 s65, s15, 16
-; GCN-HSA-NEXT: s_lshr_b32 s46, s14, 16
-; GCN-HSA-NEXT: s_mov_b32 s48, s13
-; GCN-HSA-NEXT: s_lshr_b32 s50, s12, 16
-; GCN-HSA-NEXT: s_mov_b32 s52, s11
-; GCN-HSA-NEXT: s_lshr_b32 s34, s10, 16
+; GCN-HSA-NEXT: s_mov_b32 s34, s13
+; GCN-HSA-NEXT: s_mov_b32 s36, s11
; GCN-HSA-NEXT: s_mov_b32 s30, s9
-; GCN-HSA-NEXT: s_lshr_b32 s28, s8, 16
-; GCN-HSA-NEXT: s_mov_b32 s54, s7
-; GCN-HSA-NEXT: s_lshr_b32 s56, s6, 16
-; GCN-HSA-NEXT: s_mov_b32 s58, s5
-; GCN-HSA-NEXT: s_lshr_b32 s60, s4, 16
-; GCN-HSA-NEXT: s_mov_b32 s62, s3
+; GCN-HSA-NEXT: s_mov_b32 s28, s7
+; GCN-HSA-NEXT: s_mov_b32 s38, s5
+; GCN-HSA-NEXT: s_mov_b32 s48, s3
+; GCN-HSA-NEXT: s_mov_b32 s50, s1
+; GCN-HSA-NEXT: s_ashr_i32 s41, s3, 31
+; GCN-HSA-NEXT: s_ashr_i32 s42, s3, 16
+; GCN-HSA-NEXT: s_ashr_i32 s55, s9, 16
+; GCN-HSA-NEXT: s_ashr_i32 s57, s11, 31
+; GCN-HSA-NEXT: s_ashr_i32 s59, s11, 16
+; GCN-HSA-NEXT: s_ashr_i32 s61, s13, 31
+; GCN-HSA-NEXT: s_ashr_i32 s63, s13, 16
+; GCN-HSA-NEXT: s_ashr_i32 s65, s15, 31
+; GCN-HSA-NEXT: s_ashr_i32 s67, s15, 16
+; GCN-HSA-NEXT: s_lshr_b32 s52, s14, 16
+; GCN-HSA-NEXT: s_lshr_b32 s54, s12, 16
+; GCN-HSA-NEXT: s_lshr_b32 s56, s10, 16
+; GCN-HSA-NEXT: s_lshr_b32 s58, s8, 16
+; GCN-HSA-NEXT: s_lshr_b32 s60, s6, 16
+; GCN-HSA-NEXT: s_lshr_b32 s62, s4, 16
; GCN-HSA-NEXT: s_lshr_b32 s64, s2, 16
-; GCN-HSA-NEXT: s_mov_b32 s66, s1
-; GCN-HSA-NEXT: s_lshr_b32 s68, s0, 16
+; GCN-HSA-NEXT: s_lshr_b32 s66, s0, 16
; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[24:25], 0x100000
; GCN-HSA-NEXT: s_ashr_i32 s33, s1, 31
-; GCN-HSA-NEXT: s_ashr_i32 s36, s1, 16
-; GCN-HSA-NEXT: s_ashr_i32 s39, s5, 31
-; GCN-HSA-NEXT: s_ashr_i32 s40, s5, 16
-; GCN-HSA-NEXT: s_ashr_i32 s41, s7, 31
-; GCN-HSA-NEXT: s_ashr_i32 s42, s7, 16
-; GCN-HSA-NEXT: s_ashr_i32 s43, s9, 31
-; GCN-HSA-NEXT: s_ashr_i32 s44, s9, 16
-; GCN-HSA-NEXT: s_ashr_i32 s45, s11, 31
+; GCN-HSA-NEXT: s_ashr_i32 s40, s1, 16
+; GCN-HSA-NEXT: s_ashr_i32 s43, s5, 31
+; GCN-HSA-NEXT: s_ashr_i32 s44, s5, 16
+; GCN-HSA-NEXT: s_ashr_i32 s45, s7, 31
+; GCN-HSA-NEXT: s_ashr_i32 s46, s7, 16
+; GCN-HSA-NEXT: s_ashr_i32 s47, s9, 31
; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[4:5], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[6:7], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[70:71], s[10:11], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[72:73], s[12:13], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[74:75], s[14:15], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[68:69], s[10:11], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[70:71], s[12:13], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[72:73], s[14:15], 0x100000
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
-; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[66:67], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[66:67], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[50:51], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[64:65], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[62:63], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[60:61], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[58:59], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[56:57], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[54:55], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[48:49], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[62:63], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[38:39], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[60:61], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[28:29], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[58:59], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[38:39], s[56:57], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000
+; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[54:55], 0x100000
; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000
-; GCN-HSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000
-; GCN-HSA-NEXT: s_add_u32 s54, s16, 0xf0
-; GCN-HSA-NEXT: s_addc_u32 s55, s17, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v6, s46
-; GCN-HSA-NEXT: s_add_u32 s46, s16, 0xe0
-; GCN-HSA-NEXT: v_mov_b32_e32 v7, s47
-; GCN-HSA-NEXT: s_addc_u32 s47, s17, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v24, s46
-; GCN-HSA-NEXT: v_mov_b32_e32 v25, s47
-; GCN-HSA-NEXT: s_add_u32 s46, s16, 0xd0
-; GCN-HSA-NEXT: s_addc_u32 s47, s17, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v26, s46
-; GCN-HSA-NEXT: v_mov_b32_e32 v27, s47
-; GCN-HSA-NEXT: s_add_u32 s46, s16, 0xc0
-; GCN-HSA-NEXT: s_addc_u32 s47, s17, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v28, s46
-; GCN-HSA-NEXT: v_mov_b32_e32 v18, s54
-; GCN-HSA-NEXT: v_mov_b32_e32 v29, s47
-; GCN-HSA-NEXT: s_add_u32 s46, s16, 0xb0
-; GCN-HSA-NEXT: v_mov_b32_e32 v19, s55
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s65
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s63
-; GCN-HSA-NEXT: s_addc_u32 s47, s17, 0
+; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[52:53], 0x100000
+; GCN-HSA-NEXT: s_add_u32 s52, s16, 0xf0
+; GCN-HSA-NEXT: s_addc_u32 s53, s17, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v6, s50
+; GCN-HSA-NEXT: s_add_u32 s50, s16, 0xe0
+; GCN-HSA-NEXT: v_mov_b32_e32 v7, s51
+; GCN-HSA-NEXT: s_addc_u32 s51, s17, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v8, s34
+; GCN-HSA-NEXT: s_add_u32 s34, s16, 0xd0
+; GCN-HSA-NEXT: v_mov_b32_e32 v9, s35
+; GCN-HSA-NEXT: s_addc_u32 s35, s17, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v26, s34
+; GCN-HSA-NEXT: v_mov_b32_e32 v27, s35
+; GCN-HSA-NEXT: s_add_u32 s34, s16, 0xc0
+; GCN-HSA-NEXT: s_addc_u32 s35, s17, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v28, s34
+; GCN-HSA-NEXT: v_mov_b32_e32 v18, s52
+; GCN-HSA-NEXT: v_mov_b32_e32 v29, s35
+; GCN-HSA-NEXT: s_add_u32 s34, s16, 0xb0
+; GCN-HSA-NEXT: v_mov_b32_e32 v19, s53
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s67
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s65
+; GCN-HSA-NEXT: s_addc_u32 s35, s17, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v24, s50
; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s74
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s72
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30
; GCN-HSA-NEXT: s_add_u32 s30, s16, 0xa0
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s75
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s73
+; GCN-HSA-NEXT: v_mov_b32_e32 v25, s51
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31
; GCN-HSA-NEXT: s_addc_u32 s31, s17, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
-; GCN-HSA-NEXT: v_mov_b32_e32 v8, s48
+; GCN-HSA-NEXT: v_mov_b32_e32 v10, s63
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s26
; GCN-HSA-NEXT: s_add_u32 s26, s16, 0x90
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s27
@@ -8564,38 +8611,36 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v24, s26
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s27
; GCN-HSA-NEXT: s_add_u32 s26, s16, 0x80
-; GCN-HSA-NEXT: v_mov_b32_e32 v9, s49
-; GCN-HSA-NEXT: v_mov_b32_e32 v10, s61
-; GCN-HSA-NEXT: v_mov_b32_e32 v11, s59
+; GCN-HSA-NEXT: v_mov_b32_e32 v11, s61
; GCN-HSA-NEXT: s_addc_u32 s27, s17, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
-; GCN-HSA-NEXT: v_mov_b32_e32 v12, s72
+; GCN-HSA-NEXT: v_mov_b32_e32 v12, s70
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s24
; GCN-HSA-NEXT: s_add_u32 s24, s16, 0x70
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, s73
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s50
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s51
+; GCN-HSA-NEXT: v_mov_b32_e32 v13, s71
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s48
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s49
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s25
; GCN-HSA-NEXT: s_addc_u32 s25, s17, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v30, s46
+; GCN-HSA-NEXT: v_mov_b32_e32 v30, s34
; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15]
-; GCN-HSA-NEXT: v_mov_b32_e32 v16, s52
+; GCN-HSA-NEXT: v_mov_b32_e32 v16, s36
; GCN-HSA-NEXT: v_mov_b32_e32 v14, s14
; GCN-HSA-NEXT: s_add_u32 s14, s16, 0x60
-; GCN-HSA-NEXT: v_mov_b32_e32 v17, s53
-; GCN-HSA-NEXT: v_mov_b32_e32 v31, s47
-; GCN-HSA-NEXT: v_mov_b32_e32 v18, s57
-; GCN-HSA-NEXT: v_mov_b32_e32 v19, s45
+; GCN-HSA-NEXT: v_mov_b32_e32 v17, s37
+; GCN-HSA-NEXT: v_mov_b32_e32 v31, s35
+; GCN-HSA-NEXT: v_mov_b32_e32 v18, s59
+; GCN-HSA-NEXT: v_mov_b32_e32 v19, s57
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s30
; GCN-HSA-NEXT: v_mov_b32_e32 v15, s15
; GCN-HSA-NEXT: s_addc_u32 s15, s17, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v20, s70
-; GCN-HSA-NEXT: v_mov_b32_e32 v21, s71
-; GCN-HSA-NEXT: v_mov_b32_e32 v22, s34
-; GCN-HSA-NEXT: v_mov_b32_e32 v23, s35
+; GCN-HSA-NEXT: v_mov_b32_e32 v20, s68
+; GCN-HSA-NEXT: v_mov_b32_e32 v21, s69
+; GCN-HSA-NEXT: v_mov_b32_e32 v22, s38
+; GCN-HSA-NEXT: v_mov_b32_e32 v23, s39
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s31
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s44
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s43
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s55
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s47
; GCN-HSA-NEXT: v_mov_b32_e32 v26, s26
; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19]
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s27
@@ -8605,8 +8650,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s29
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[20:23]
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s22
-; GCN-HSA-NEXT: v_mov_b32_e32 v10, s42
-; GCN-HSA-NEXT: v_mov_b32_e32 v11, s41
+; GCN-HSA-NEXT: v_mov_b32_e32 v10, s46
+; GCN-HSA-NEXT: v_mov_b32_e32 v11, s45
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s23
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s25
; GCN-HSA-NEXT: v_mov_b32_e32 v18, s14
@@ -8619,8 +8664,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13
; GCN-HSA-NEXT: s_addc_u32 s13, s17, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s12
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s40
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s39
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s44
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s43
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s13
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: s_nop 0
@@ -8639,8 +8684,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
; GCN-HSA-NEXT: s_addc_u32 s9, s17, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s37
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s42
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s41
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: s_nop 0
@@ -8659,7 +8704,7 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s40
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s33
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -8675,151 +8720,158 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64:
; GCN-NOHSA-VI: ; %bb.0:
; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x24
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr24_sgpr25
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr26_sgpr27
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr42_sgpr43
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr50_sgpr51
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr56_sgpr57
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr62_sgpr63
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr68_sgpr69
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr70_sgpr71
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s0, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s26, s1
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s1, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s2, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s34, s3
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s3, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s0, 16
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s24, s1
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s1, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s2, 16
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s26, s3
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s3, 16
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s4, 16
; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s5
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s5, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s6, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s7
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s7, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s8, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s54, s9
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s9, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s10, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s60, s11
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s11, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s6, 16
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s50, s7
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s7, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s8, 16
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s56, s9
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s9, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s10, 16
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s62, s11
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s11, 16
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s12, 16
; GCN-NOHSA-VI-NEXT: s_mov_b32 s68, s13
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s70, s13, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s74, s14, 16
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s76, s15
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s70, s15
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s72, s13, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s76, s14, 16
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s78, s15, 16
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[8:9], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[10:11], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[12:13], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[72:73], s[14:15], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[22:23], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[26:27], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[30:31], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[34:35], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[36:37], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[40:41], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[8:9], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[12:13], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[74:75], s[14:15], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[28:29], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[24:25], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[30:31], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[36:37], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[26:27], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[38:39], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[40:41], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[42:43], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[44:45], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[46:47], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[48:49], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[50:51], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[54:55], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[56:57], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[58:59], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[48:49], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[50:51], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[52:53], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[54:55], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[56:57], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[58:59], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[60:61], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[62:63], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[66:67], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[68:69], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[70:71], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[74:75], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[64:65], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[66:67], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[68:69], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[72:73], 0x100000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[76:77], 0x100000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[78:79], 0x100000
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s60
-; GCN-NOHSA-VI-NEXT: s_add_u32 s60, s16, 0xf0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[70:71], 0x100000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[78:79], 0x100000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s62
+; GCN-NOHSA-VI-NEXT: s_add_u32 s62, s16, 0xf0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s63
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s63, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s62
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s64
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s63
+; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GCN-NOHSA-VI-NEXT: s_nop 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s60
+; GCN-NOHSA-VI-NEXT: s_add_u32 s60, s16, 0xe0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61
; GCN-NOHSA-VI-NEXT: s_addc_u32 s61, s17, 0
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s60
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s74
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s75
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s61
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: s_nop 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s56
+; GCN-NOHSA-VI-NEXT: s_add_u32 s56, s16, 0xd0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s57
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s57, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s56
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s58
-; GCN-NOHSA-VI-NEXT: s_add_u32 s58, s16, 0xe0
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s59, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s58
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s72
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s73
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s59
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s57
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: s_nop 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54
-; GCN-NOHSA-VI-NEXT: s_add_u32 s54, s16, 0xd0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s55, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s54
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s56
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s55
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s46
+; GCN-NOHSA-VI-NEXT: s_add_u32 s46, s16, 0xc0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s47, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s46
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s54
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s47
+; GCN-NOHSA-VI-NEXT: s_add_u32 s46, s16, 0xb0
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-NOHSA-VI-NEXT: s_nop 0
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s47, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s46
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s51
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s52
-; GCN-NOHSA-VI-NEXT: s_add_u32 s52, s16, 0xc0
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s53, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s52
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s64
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s65
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s53
-; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-NOHSA-VI-NEXT: s_nop 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48
-; GCN-NOHSA-VI-NEXT: s_add_u32 s48, s16, 0xb0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s49, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s48
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s50
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s49
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s47
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: s_nop 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s38
-; GCN-NOHSA-VI-NEXT: s_add_u32 s38, s16, 0xa0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s39, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s38
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s46
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s39
-; GCN-NOHSA-VI-NEXT: s_add_u32 s38, s16, 0x90
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34
+; GCN-NOHSA-VI-NEXT: s_add_u32 s34, s16, 0xa0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s35, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s34
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s48
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s35
+; GCN-NOHSA-VI-NEXT: s_add_u32 s34, s16, 0x90
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s39, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s38
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s35, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s34
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s45
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s42
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s39
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s35
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: s_nop 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24
-; GCN-NOHSA-VI-NEXT: s_add_u32 s24, s16, 0x80
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s25, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s24
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22
+; GCN-NOHSA-VI-NEXT: s_add_u32 s22, s16, 0x80
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s23, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s22
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s40
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s25
-; GCN-NOHSA-VI-NEXT: s_add_u32 s24, s16, 0x70
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s23
+; GCN-NOHSA-VI-NEXT: s_add_u32 s22, s16, 0x70
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-NOHSA-VI-NEXT: s_addc_u32 s25, s17, 0
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s24
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s25
+; GCN-NOHSA-VI-NEXT: s_addc_u32 s23, s17, 0
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s22
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s38
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s36
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s37
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s23
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: s_nop 0
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20
@@ -8848,8 +8900,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s2
; GCN-NOHSA-VI-NEXT: s_add_u32 s2, s16, 48
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25
; GCN-NOHSA-VI-NEXT: s_addc_u32 s3, s17, 0
; GCN-NOHSA-VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s3
@@ -9056,92 +9108,100 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GFX12-LABEL: constant_sextload_v32i16_to_v32i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[16:19], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr22_sgpr23
+; GFX12-NEXT: ; implicit-def: $sgpr24_sgpr25
+; GFX12-NEXT: ; implicit-def: $sgpr28_sgpr29
+; GFX12-NEXT: ; implicit-def: $sgpr38_sgpr39
+; GFX12-NEXT: ; implicit-def: $sgpr66_sgpr67
+; GFX12-NEXT: ; implicit-def: $sgpr48_sgpr49
+; GFX12-NEXT: ; implicit-def: $sgpr46_sgpr47
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshr_b32 s28, s2, 16
-; GFX12-NEXT: s_lshr_b32 s42, s5, 16
-; GFX12-NEXT: s_lshr_b32 s50, s8, 16
-; GFX12-NEXT: s_mov_b32 s60, s11
-; GFX12-NEXT: s_lshr_b32 s22, s0, 16
-; GFX12-NEXT: s_mov_b32 s24, s1
-; GFX12-NEXT: s_lshr_b32 s26, s1, 16
-; GFX12-NEXT: s_mov_b32 s30, s3
-; GFX12-NEXT: s_lshr_b32 s36, s3, 16
-; GFX12-NEXT: s_lshr_b32 s38, s4, 16
-; GFX12-NEXT: s_mov_b32 s40, s5
-; GFX12-NEXT: s_lshr_b32 s44, s6, 16
-; GFX12-NEXT: s_mov_b32 s46, s7
-; GFX12-NEXT: s_lshr_b32 s48, s7, 16
-; GFX12-NEXT: s_mov_b32 s52, s9
-; GFX12-NEXT: s_lshr_b32 s54, s9, 16
-; GFX12-NEXT: s_bfe_i64 s[56:57], s[10:11], 0x100000
-; GFX12-NEXT: s_lshr_b32 s58, s10, 16
-; GFX12-NEXT: s_lshr_b32 s62, s11, 16
-; GFX12-NEXT: s_bfe_i64 s[10:11], s[28:29], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[28:29], s[42:43], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[42:43], s[50:51], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[50:51], s[60:61], 0x100000
-; GFX12-NEXT: s_lshr_b32 s60, s14, 16
-; GFX12-NEXT: s_bfe_i64 s[64:65], s[14:15], 0x100000
-; GFX12-NEXT: s_mov_b32 s14, s15
-; GFX12-NEXT: s_lshr_b32 s66, s15, 16
+; GFX12-NEXT: s_lshr_b32 s30, s0, 16
+; GFX12-NEXT: s_mov_b32 s22, s1
+; GFX12-NEXT: s_mov_b32 s24, s3
+; GFX12-NEXT: s_lshr_b32 s50, s4, 16
+; GFX12-NEXT: s_mov_b32 s28, s5
+; GFX12-NEXT: s_lshr_b32 s52, s5, 16
+; GFX12-NEXT: s_lshr_b32 s60, s9, 16
+; GFX12-NEXT: s_lshr_b32 s62, s10, 16
+; GFX12-NEXT: s_lshr_b32 s42, s3, 16
+; GFX12-NEXT: s_lshr_b32 s58, s8, 16
; GFX12-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000
+; GFX12-NEXT: s_lshr_b32 s34, s1, 16
; GFX12-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[4:5], s[22:23], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[8:9], s[24:25], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[6:7], s[26:27], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[24:25], s[30:31], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[22:23], s[36:37], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[26:27], s[38:39], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[30:31], s[40:41], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[36:37], s[44:45], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[40:41], s[46:47], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[38:39], s[48:49], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[44:45], s[52:53], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[46:47], s[54:55], 0x100000
-; GFX12-NEXT: s_bfe_i64 s[48:49], s[58:59], 0x100000
-; GFX12-NEXT: s_lshr_b32 s52, s12, 16
-; GFX12-NEXT: s_bfe_i64 s[54:55], s[12:13], 0x100000
-; GFX12-NEXT: s_mov_b32 s12, s13
-; GFX12-NEXT: s_lshr_b32 s58, s13, 16
-; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
+; GFX12-NEXT: s_lshr_b32 s40, s2, 16
+; GFX12-NEXT: s_bfe_i64 s[26:27], s[6:7], 0x100000
+; GFX12-NEXT: s_lshr_b32 s54, s6, 16
+; GFX12-NEXT: s_mov_b32 s38, s7
+; GFX12-NEXT: s_lshr_b32 s56, s7, 16
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[30:31], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[6:7], s[22:23], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[22:23], s[24:25], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[24:25], s[50:51], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[30:31], s[28:29], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[28:29], s[52:53], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[50:51], s[60:61], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x100000
+; GFX12-NEXT: s_lshr_b32 s60, s14, 16
+; GFX12-NEXT: s_bfe_i64 s[62:63], s[14:15], 0x100000
+; GFX12-NEXT: s_mov_b32 s66, s15
+; GFX12-NEXT: s_lshr_b32 s14, s15, 16
+; GFX12-NEXT: s_bfe_i64 s[44:45], s[10:11], 0x100000
+; GFX12-NEXT: s_mov_b32 s48, s11
+; GFX12-NEXT: s_lshr_b32 s64, s11, 16
+; GFX12-NEXT: s_bfe_i64 s[10:11], s[42:43], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[42:43], s[58:59], 0x100000
+; GFX12-NEXT: ; implicit-def: $sgpr58_sgpr59
+; GFX12-NEXT: s_bfe_i64 s[20:21], s[4:5], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[36:37], s[8:9], 0x100000
+; GFX12-NEXT: s_mov_b32 s46, s9
+; GFX12-NEXT: s_bfe_i64 s[4:5], s[34:35], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[8:9], s[40:41], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[34:35], s[54:55], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[40:41], s[38:39], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[38:39], s[56:57], 0x100000
+; GFX12-NEXT: s_lshr_b32 s54, s12, 16
+; GFX12-NEXT: s_bfe_i64 s[56:57], s[12:13], 0x100000
+; GFX12-NEXT: s_mov_b32 s58, s13
+; GFX12-NEXT: s_lshr_b32 s12, s13, 16
; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000
; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s15
-; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s67
; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s67
-; GFX12-NEXT: v_dual_mov_b32 v2, s66 :: v_dual_mov_b32 v5, s65
-; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000
-; GFX12-NEXT: v_dual_mov_b32 v4, s64 :: v_dual_mov_b32 v7, s61
-; GFX12-NEXT: v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s13
-; GFX12-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v11, s59
-; GFX12-NEXT: v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s55
-; GFX12-NEXT: v_dual_mov_b32 v12, s54 :: v_dual_mov_b32 v15, s53
-; GFX12-NEXT: v_mov_b32_e32 v14, s52
-; GFX12-NEXT: s_bfe_i64 s[12:13], s[62:63], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v0, s66 :: v_dual_mov_b32 v3, s15
+; GFX12-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v5, s63
+; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v4, s62 :: v_dual_mov_b32 v7, s61
+; GFX12-NEXT: v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s59
+; GFX12-NEXT: v_dual_mov_b32 v8, s58 :: v_dual_mov_b32 v11, s13
+; GFX12-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v13, s57
+; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000
+; GFX12-NEXT: v_dual_mov_b32 v12, s56 :: v_dual_mov_b32 v15, s55
+; GFX12-NEXT: v_mov_b32_e32 v14, s54
+; GFX12-NEXT: s_bfe_i64 s[12:13], s[64:65], 0x100000
+; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:240
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:224
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:208
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:192
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: v_dual_mov_b32 v0, s50 :: v_dual_mov_b32 v3, s13
-; GFX12-NEXT: v_dual_mov_b32 v1, s51 :: v_dual_mov_b32 v2, s12
-; GFX12-NEXT: v_dual_mov_b32 v5, s57 :: v_dual_mov_b32 v4, s56
-; GFX12-NEXT: v_dual_mov_b32 v7, s49 :: v_dual_mov_b32 v6, s48
-; GFX12-NEXT: v_dual_mov_b32 v9, s45 :: v_dual_mov_b32 v8, s44
-; GFX12-NEXT: v_dual_mov_b32 v11, s47 :: v_dual_mov_b32 v10, s46
-; GFX12-NEXT: v_dual_mov_b32 v13, s35 :: v_dual_mov_b32 v12, s34
+; GFX12-NEXT: v_dual_mov_b32 v0, s48 :: v_dual_mov_b32 v3, s13
+; GFX12-NEXT: v_dual_mov_b32 v1, s49 :: v_dual_mov_b32 v2, s12
+; GFX12-NEXT: v_dual_mov_b32 v5, s45 :: v_dual_mov_b32 v4, s44
+; GFX12-NEXT: v_dual_mov_b32 v7, s53 :: v_dual_mov_b32 v6, s52
+; GFX12-NEXT: v_dual_mov_b32 v9, s47 :: v_dual_mov_b32 v8, s46
+; GFX12-NEXT: v_dual_mov_b32 v11, s51 :: v_dual_mov_b32 v10, s50
+; GFX12-NEXT: v_dual_mov_b32 v13, s37 :: v_dual_mov_b32 v12, s36
; GFX12-NEXT: v_dual_mov_b32 v15, s43 :: v_dual_mov_b32 v14, s42
; GFX12-NEXT: v_dual_mov_b32 v17, s41 :: v_dual_mov_b32 v16, s40
; GFX12-NEXT: v_dual_mov_b32 v19, s39 :: v_dual_mov_b32 v18, s38
-; GFX12-NEXT: v_dual_mov_b32 v21, s21 :: v_dual_mov_b32 v20, s20
-; GFX12-NEXT: v_dual_mov_b32 v23, s37 :: v_dual_mov_b32 v22, s36
+; GFX12-NEXT: v_dual_mov_b32 v21, s27 :: v_dual_mov_b32 v20, s26
+; GFX12-NEXT: v_dual_mov_b32 v23, s35 :: v_dual_mov_b32 v22, s34
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:176
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:160
@@ -9151,16 +9211,16 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[16:17] offset:96
; GFX12-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v3, s29
; GFX12-NEXT: v_dual_mov_b32 v1, s31 :: v_dual_mov_b32 v2, s28
-; GFX12-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
-; GFX12-NEXT: v_dual_mov_b32 v7, s27 :: v_dual_mov_b32 v6, s26
-; GFX12-NEXT: v_dual_mov_b32 v9, s25 :: v_dual_mov_b32 v8, s24
-; GFX12-NEXT: v_dual_mov_b32 v11, s23 :: v_dual_mov_b32 v10, s22
+; GFX12-NEXT: v_dual_mov_b32 v5, s21 :: v_dual_mov_b32 v4, s20
+; GFX12-NEXT: v_dual_mov_b32 v7, s25 :: v_dual_mov_b32 v6, s24
+; GFX12-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v8, s22
+; GFX12-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
; GFX12-NEXT: v_dual_mov_b32 v13, s1 :: v_dual_mov_b32 v12, s0
-; GFX12-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v14, s10
-; GFX12-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v16, s8
-; GFX12-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6
+; GFX12-NEXT: v_dual_mov_b32 v15, s9 :: v_dual_mov_b32 v14, s8
+; GFX12-NEXT: v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6
+; GFX12-NEXT: v_dual_mov_b32 v19, s5 :: v_dual_mov_b32 v18, s4
; GFX12-NEXT: v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18
-; GFX12-NEXT: v_dual_mov_b32 v23, s5 :: v_dual_mov_b32 v22, s4
+; GFX12-NEXT: v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s2
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:80
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:64
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index b534c2c267fad..c5213336f818c 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -6396,39 +6396,40 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX6-NOHSA-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT: s_lshr_b32 s6, s5, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s4, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s4, 24
-; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s4, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s5, 8
-; GFX6-NOHSA-NEXT: s_mov_b32 s16, s5
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
+; GFX6-NOHSA-NEXT: s_lshr_b32 s8, s5, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s4, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s4, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s4, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s5, 8
+; GFX6-NOHSA-NEXT: s_mov_b32 s6, s5
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[4:5], 0x80000
-; GFX6-NOHSA-NEXT: s_ashr_i32 s15, s5, 31
+; GFX6-NOHSA-NEXT: s_ashr_i32 s17, s5, 31
; GFX6-NOHSA-NEXT: s_ashr_i32 s20, s5, 24
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[14:15], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[16:17], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s20
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s15
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s17
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s18
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s19
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s16
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s17
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s7
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s6
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s7
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s8
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s9
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s10
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s11
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s10
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s11
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s12
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s13
; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s12
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s13
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s14
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s15
; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s4
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s5
@@ -6439,26 +6440,27 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX7-HSA: ; %bb.0:
; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX7-HSA-NEXT: s_add_i32 s12, s12, s17
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX7-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX7-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-HSA-NEXT: s_lshr_b32 s4, s3, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s6, s2, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s8, s2, 24
-; GFX7-HSA-NEXT: s_lshr_b32 s10, s2, 8
-; GFX7-HSA-NEXT: s_lshr_b32 s12, s3, 8
-; GFX7-HSA-NEXT: s_mov_b32 s14, s3
-; GFX7-HSA-NEXT: s_ashr_i32 s5, s3, 31
+; GFX7-HSA-NEXT: s_lshr_b32 s6, s3, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s8, s2, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s10, s2, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s12, s2, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s14, s3, 8
+; GFX7-HSA-NEXT: s_mov_b32 s4, s3
+; GFX7-HSA-NEXT: s_ashr_i32 s7, s3, 31
; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
; GFX7-HSA-NEXT: s_ashr_i32 s18, s3, 24
-; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[6:7], 0x80000
; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2
; GFX7-HSA-NEXT: s_add_u32 s2, s0, 48
; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3
@@ -6467,14 +6469,14 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2
; GFX7-HSA-NEXT: s_add_u32 s2, s0, 16
; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s18
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s5
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s7
; GFX7-HSA-NEXT: s_addc_u32 s3, s1, 0
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s3
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s6
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s7
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s8
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s9
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s10
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s11
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s2
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1
@@ -6482,15 +6484,15 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX7-HSA-NEXT: s_add_u32 s0, s0, 32
; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s16
; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s10
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s11
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s12
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s13
; GFX7-HSA-NEXT: s_addc_u32 s1, s1, 0
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s14
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s15
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s12
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s13
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s14
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s15
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: s_endpgm
@@ -6498,24 +6500,25 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i64:
; GFX8-NOHSA: ; %bb.0:
; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NOHSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s3, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s2, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s2, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s2, 8
-; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s3, 8
-; GFX8-NOHSA-NEXT: s_mov_b32 s14, s3
-; GFX8-NOHSA-NEXT: s_ashr_i32 s5, s3, 31
+; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s3, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s2, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s10, s2, 24
+; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s2, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s3, 8
+; GFX8-NOHSA-NEXT: s_mov_b32 s4, s3
+; GFX8-NOHSA-NEXT: s_ashr_i32 s7, s3, 31
; GFX8-NOHSA-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
; GFX8-NOHSA-NEXT: s_ashr_i32 s18, s3, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[6:7], 0x80000
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 48
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3
@@ -6524,14 +6527,14 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NOHSA-NEXT: s_add_u32 s2, s0, 16
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s18
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s5
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s7
; GFX8-NOHSA-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s7
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s9
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s9
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s10
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s11
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1
@@ -6539,15 +6542,15 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX8-NOHSA-NEXT: s_add_u32 s0, s0, 32
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s16
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s17
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s10
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s11
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s13
; GFX8-NOHSA-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s14
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s15
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s13
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s15
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_endpgm
@@ -6611,33 +6614,34 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
; GFX12-LABEL: constant_sextload_v8i8_to_v8i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshr_b32 s4, s3, 16
-; GFX12-NEXT: s_lshr_b32 s6, s2, 16
-; GFX12-NEXT: s_lshr_b32 s8, s2, 24
-; GFX12-NEXT: s_lshr_b32 s10, s2, 8
+; GFX12-NEXT: s_lshr_b32 s6, s3, 16
+; GFX12-NEXT: s_lshr_b32 s8, s2, 16
+; GFX12-NEXT: s_lshr_b32 s10, s2, 24
+; GFX12-NEXT: s_lshr_b32 s12, s2, 8
+; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
-; GFX12-NEXT: s_lshr_b32 s12, s3, 8
-; GFX12-NEXT: s_mov_b32 s14, s3
+; GFX12-NEXT: s_lshr_b32 s14, s3, 8
+; GFX12-NEXT: s_mov_b32 s4, s3
; GFX12-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x80000
; GFX12-NEXT: s_ashr_i32 s15, s3, 31
; GFX12-NEXT: s_ashr_i32 s18, s3, 24
-; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v3, s15
-; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v9, s7
-; GFX12-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v11, s9
-; GFX12-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v7, s11
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[14:15], 0x80000
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v3, s15
+; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v9, s9
+; GFX12-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v11, s11
+; GFX12-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v7, s13
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[4:5], s[14:15], 0x80000
; GFX12-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v5, s17
-; GFX12-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s5
-; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v13, s3
-; GFX12-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v15, s13
-; GFX12-NEXT: v_mov_b32_e32 v14, s12
+; GFX12-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s7
+; GFX12-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v13, s3
+; GFX12-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v15, s5
+; GFX12-NEXT: v_mov_b32_e32 v14, s4
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1]
@@ -7032,71 +7036,73 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
; GFX6-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr10_sgpr11
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s6, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s6, 24
-; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s6, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s4, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s4, 24
-; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s4, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s7, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s7, 8
-; GFX6-NOHSA-NEXT: s_mov_b32 s26, s7
+; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s6, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s6, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s6, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s4, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s4, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s4, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s7, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s26, s7, 8
+; GFX6-NOHSA-NEXT: s_mov_b32 s10, s7
; GFX6-NOHSA-NEXT: s_lshr_b32 s28, s5, 16
; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s5, 8
; GFX6-NOHSA-NEXT: s_mov_b32 s8, s5
; GFX6-NOHSA-NEXT: s_bfe_i64 s[34:35], s[4:5], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[36:37], s[6:7], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GFX6-NOHSA-NEXT: s_ashr_i32 s29, s5, 31
-; GFX6-NOHSA-NEXT: s_ashr_i32 s31, s5, 24
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GFX6-NOHSA-NEXT: s_ashr_i32 s33, s7, 31
-; GFX6-NOHSA-NEXT: s_ashr_i32 s38, s7, 24
+; GFX6-NOHSA-NEXT: s_ashr_i32 s27, s5, 31
+; GFX6-NOHSA-NEXT: s_ashr_i32 s29, s5, 24
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[38:39], s[10:11], 0x80000
+; GFX6-NOHSA-NEXT: s_ashr_i32 s31, s7, 31
+; GFX6-NOHSA-NEXT: s_ashr_i32 s33, s7, 24
; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[30:31], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s36
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s37
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s34
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s35
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s38
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s33
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s26
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s27
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s12
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s13
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s14
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s15
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s33
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s31
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s38
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s39
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s14
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s15
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s16
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s17
; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s31
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s29
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s16
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s17
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s29
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s27
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s18
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s19
; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s18
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s19
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s20
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s21
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s20
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s21
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s22
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s23
; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s8
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s9
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s22
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s23
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
-; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s24
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s25
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s12
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s13
; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s10
; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s11
@@ -7115,44 +7121,46 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
; GFX7-HSA-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX7-HSA-NEXT: s_add_i32 s12, s12, s17
; GFX7-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX7-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-HSA-NEXT: s_lshr_b32 s2, s6, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s8, s6, 24
-; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GFX7-HSA-NEXT: s_lshr_b32 s10, s6, 8
-; GFX7-HSA-NEXT: s_lshr_b32 s12, s4, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s14, s4, 24
-; GFX7-HSA-NEXT: s_lshr_b32 s16, s4, 8
-; GFX7-HSA-NEXT: s_lshr_b32 s18, s7, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s20, s7, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s10, s6, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s12, s6, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s16, s4, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s18, s4, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s20, s4, 8
+; GFX7-HSA-NEXT: s_mov_b32 s2, s5
; GFX7-HSA-NEXT: s_ashr_i32 s27, s5, 31
; GFX7-HSA-NEXT: s_ashr_i32 s29, s5, 24
-; GFX7-HSA-NEXT: s_mov_b32 s22, s7
-; GFX7-HSA-NEXT: s_lshr_b32 s24, s5, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s26, s5, 8
-; GFX7-HSA-NEXT: s_mov_b32 s28, s5
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s2
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s3
-; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[8:9], 0x80000
+; GFX7-HSA-NEXT: s_lshr_b32 s26, s5, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s28, s5, 8
+; GFX7-HSA-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[10:11], 0x80000
+; GFX7-HSA-NEXT: s_lshr_b32 s14, s6, 8
+; GFX7-HSA-NEXT: s_mov_b32 s8, s7
+; GFX7-HSA-NEXT: s_lshr_b32 s22, s7, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s24, s7, 8
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[12:13], 0x80000
; GFX7-HSA-NEXT: s_ashr_i32 s33, s7, 31
; GFX7-HSA-NEXT: s_ashr_i32 s34, s7, 24
-; GFX7-HSA-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s2
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s3
-; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[28:29], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[26:27], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[8:9], s[24:25], 0x80000
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s4
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s5
+; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[28:29], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[24:25], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
; GFX7-HSA-NEXT: s_add_u32 s24, s0, 0x50
; GFX7-HSA-NEXT: s_addc_u32 s25, s1, 0
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6
@@ -7164,30 +7172,30 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s25
; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s6
; GFX7-HSA-NEXT: s_add_u32 s6, s0, 16
-; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s10
-; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s11
+; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s14
+; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s15
; GFX7-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
; GFX7-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s12
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s13
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s14
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s15
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s17
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s18
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s19
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s1
; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s30
; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s31
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s16
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s17
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s20
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s21
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s0
; GFX7-HSA-NEXT: s_add_u32 s6, s0, 0x70
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s18
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s19
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s22
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s23
; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s34
; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s33
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7
@@ -7197,16 +7205,16 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7
; GFX7-HSA-NEXT: s_add_u32 s6, s0, 48
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s22
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s23
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s20
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s21
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s12
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s13
; GFX7-HSA-NEXT: s_addc_u32 s7, s1, 0
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s6
; GFX7-HSA-NEXT: s_add_u32 s0, s0, 32
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s10
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s11
; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s29
; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s27
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s7
@@ -7224,110 +7232,111 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i64:
; GFX8-NOHSA: ; %bb.0:
; GFX8-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr10_sgpr11
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
+; GFX8-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s10, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s10, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s10, 8
-; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s8, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s8, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s22, s8, 8
-; GFX8-NOHSA-NEXT: s_lshr_b32 s24, s11, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s26, s11, 8
-; GFX8-NOHSA-NEXT: s_mov_b32 s28, s11
-; GFX8-NOHSA-NEXT: s_lshr_b32 s6, s9, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s4, s9, 8
-; GFX8-NOHSA-NEXT: s_mov_b32 s2, s9
-; GFX8-NOHSA-NEXT: s_ashr_i32 s25, s9, 31
-; GFX8-NOHSA-NEXT: s_ashr_i32 s29, s9, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[30:31], s[8:9], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x80000
+; GFX8-NOHSA-NEXT: s_lshr_b32 s14, s6, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s6, 24
+; GFX8-NOHSA-NEXT: s_lshr_b32 s18, s6, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s4, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s22, s4, 24
+; GFX8-NOHSA-NEXT: s_lshr_b32 s24, s4, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s26, s7, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s7, 8
+; GFX8-NOHSA-NEXT: s_mov_b32 s10, s7
+; GFX8-NOHSA-NEXT: s_lshr_b32 s28, s5, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s8, s5, 8
+; GFX8-NOHSA-NEXT: s_mov_b32 s2, s5
+; GFX8-NOHSA-NEXT: s_ashr_i32 s27, s5, 31
+; GFX8-NOHSA-NEXT: s_ashr_i32 s29, s5, 24
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[8:9], s[28:29], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GFX8-NOHSA-NEXT: s_ashr_i32 s28, s11, 31
-; GFX8-NOHSA-NEXT: s_ashr_i32 s33, s11, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[24:25], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[28:29], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
+; GFX8-NOHSA-NEXT: s_ashr_i32 s28, s7, 31
+; GFX8-NOHSA-NEXT: s_ashr_i32 s33, s7, 24
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[26:27], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s12
-; GFX8-NOHSA-NEXT: s_add_u32 s12, s0, 0x50
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s13
-; GFX8-NOHSA-NEXT: s_addc_u32 s13, s1, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s12
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s14
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s15
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s13
-; GFX8-NOHSA-NEXT: s_add_u32 s12, s0, 64
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s14
+; GFX8-NOHSA-NEXT: s_add_u32 s14, s0, 0x50
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s15
+; GFX8-NOHSA-NEXT: s_addc_u32 s15, s1, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s14
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s16
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s17
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s15
+; GFX8-NOHSA-NEXT: s_add_u32 s14, s0, 64
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_addc_u32 s13, s1, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s12
+; GFX8-NOHSA-NEXT: s_addc_u32 s15, s1, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s14
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s34
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s35
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s16
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s17
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s13
-; GFX8-NOHSA-NEXT: s_add_u32 s12, s0, 16
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s18
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s19
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s15
+; GFX8-NOHSA-NEXT: s_add_u32 s14, s0, 16
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_addc_u32 s13, s1, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s12
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s18
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s19
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s20
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s21
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s13
+; GFX8-NOHSA-NEXT: s_addc_u32 s15, s1, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s14
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s20
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s21
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s22
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s23
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s15
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s30
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s31
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s22
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s23
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s24
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s25
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_nop 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10
-; GFX8-NOHSA-NEXT: s_add_u32 s10, s0, 0x70
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s11
-; GFX8-NOHSA-NEXT: s_addc_u32 s11, s1, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s10
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x70
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s7
+; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s33
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s28
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s11
-; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_nop 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s8
-; GFX8-NOHSA-NEXT: s_add_u32 s8, s0, 0x60
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s9
-; GFX8-NOHSA-NEXT: s_addc_u32 s9, s1, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s8
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s26
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s27
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s9
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7
+; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 0x60
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_nop 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s6
-; GFX8-NOHSA-NEXT: s_add_u32 s6, s0, 48
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s7
; GFX8-NOHSA-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s10
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s11
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s12
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s13
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7
+; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GFX8-NOHSA-NEXT: s_nop 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NOHSA-NEXT: s_add_u32 s4, s0, 48
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NOHSA-NEXT: s_addc_u32 s5, s1, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4
; GFX8-NOHSA-NEXT: s_add_u32 s0, s0, 32
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s29
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s25
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s27
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NOHSA-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s3
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s5
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s8
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s9
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_endpgm
@@ -7434,57 +7443,59 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
; GFX12-LABEL: constant_sextload_v16i8_to_v16i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX12-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshr_b32 s2, s6, 16
-; GFX12-NEXT: s_lshr_b32 s8, s6, 24
-; GFX12-NEXT: s_lshr_b32 s10, s6, 8
+; GFX12-NEXT: s_lshr_b32 s10, s6, 16
+; GFX12-NEXT: s_lshr_b32 s12, s6, 24
+; GFX12-NEXT: s_lshr_b32 s14, s6, 8
; GFX12-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GFX12-NEXT: s_lshr_b32 s12, s4, 16
-; GFX12-NEXT: s_lshr_b32 s14, s4, 24
-; GFX12-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
-; GFX12-NEXT: s_lshr_b32 s16, s4, 8
-; GFX12-NEXT: v_dual_mov_b32 v4, s30 :: v_dual_mov_b32 v9, s3
-; GFX12-NEXT: v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s9
-; GFX12-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v3, s11
-; GFX12-NEXT: s_lshr_b32 s18, s7, 16
+; GFX12-NEXT: s_lshr_b32 s16, s4, 16
+; GFX12-NEXT: s_lshr_b32 s18, s4, 24
+; GFX12-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v5, s31
-; GFX12-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v13, s13
-; GFX12-NEXT: s_lshr_b32 s20, s7, 8
-; GFX12-NEXT: s_mov_b32 s22, s7
+; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
+; GFX12-NEXT: s_lshr_b32 s20, s4, 8
+; GFX12-NEXT: v_dual_mov_b32 v4, s30 :: v_dual_mov_b32 v9, s11
+; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s13
+; GFX12-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v3, s15
+; GFX12-NEXT: s_lshr_b32 s22, s7, 16
+; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
-; GFX12-NEXT: s_lshr_b32 s24, s5, 16
+; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v5, s31
+; GFX12-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v13, s17
+; GFX12-NEXT: s_lshr_b32 s24, s7, 8
+; GFX12-NEXT: s_mov_b32 s2, s7
+; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
+; GFX12-NEXT: s_lshr_b32 s26, s5, 16
+; GFX12-NEXT: s_mov_b32 s8, s5
+; GFX12-NEXT: s_ashr_i32 s27, s5, 31
; GFX12-NEXT: s_ashr_i32 s33, s7, 31
; GFX12-NEXT: s_ashr_i32 s36, s7, 24
-; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s15
-; GFX12-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v7, s17
-; GFX12-NEXT: s_lshr_b32 s26, s5, 8
-; GFX12-NEXT: s_mov_b32 s28, s5
-; GFX12-NEXT: s_ashr_i32 s27, s5, 31
-; GFX12-NEXT: s_ashr_i32 s29, s5, 24
; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GFX12-NEXT: v_mov_b32_e32 v6, s16
+; GFX12-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v15, s19
+; GFX12-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v7, s21
+; GFX12-NEXT: s_lshr_b32 s28, s5, 8
+; GFX12-NEXT: s_ashr_i32 s29, s5, 24
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
+; GFX12-NEXT: v_mov_b32_e32 v6, s20
+; GFX12-NEXT: s_bfe_i64 s[4:5], s[8:9], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[8:9], s[26:27], 0x80000
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64
-; GFX12-NEXT: v_dual_mov_b32 v0, s18 :: v_dual_mov_b32 v3, s33
-; GFX12-NEXT: v_dual_mov_b32 v1, s19 :: v_dual_mov_b32 v2, s36
-; GFX12-NEXT: v_mov_b32_e32 v9, s23
-; GFX12-NEXT: s_bfe_i64 s[4:5], s[28:29], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[6:7], s[26:27], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v8, s22 :: v_dual_mov_b32 v11, s21
-; GFX12-NEXT: v_dual_mov_b32 v10, s20 :: v_dual_mov_b32 v17, s25
-; GFX12-NEXT: v_dual_mov_b32 v16, s24 :: v_dual_mov_b32 v19, s27
+; GFX12-NEXT: v_dual_mov_b32 v0, s22 :: v_dual_mov_b32 v3, s33
+; GFX12-NEXT: v_dual_mov_b32 v1, s23 :: v_dual_mov_b32 v2, s36
+; GFX12-NEXT: v_mov_b32_e32 v9, s3
+; GFX12-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s25
+; GFX12-NEXT: v_dual_mov_b32 v10, s24 :: v_dual_mov_b32 v17, s9
+; GFX12-NEXT: v_dual_mov_b32 v16, s8 :: v_dual_mov_b32 v19, s27
; GFX12-NEXT: v_dual_mov_b32 v18, s29 :: v_dual_mov_b32 v21, s5
; GFX12-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v23, s7
; GFX12-NEXT: v_mov_b32_e32 v22, s6
@@ -8205,304 +8216,314 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX6-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9
; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr26_sgpr27
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr12_sgpr13
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr14_sgpr15
+; GFX6-NOHSA-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX6-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT: s_lshr_b32 s28, s6, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s6, 24
-; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s6, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s26, s4, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s4, 24
-; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s4, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s2, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s12, s2, 24
-; GFX6-NOHSA-NEXT: s_lshr_b32 s14, s2, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s0, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s18, s0, 24
-; GFX6-NOHSA-NEXT: s_mov_b32 s34, s7
-; GFX6-NOHSA-NEXT: s_ashr_i32 s11, s1, 31
-; GFX6-NOHSA-NEXT: s_ashr_i32 s13, s1, 24
-; GFX6-NOHSA-NEXT: s_ashr_i32 s15, s3, 31
-; GFX6-NOHSA-NEXT: s_ashr_i32 s17, s3, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s6, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s10, s6, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s16, s6, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s22, s4, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s20, s4, 24
+; GFX6-NOHSA-NEXT: s_mov_b32 s26, s7
+; GFX6-NOHSA-NEXT: s_mov_b32 s12, s5
+; GFX6-NOHSA-NEXT: s_mov_b32 s14, s3
+; GFX6-NOHSA-NEXT: s_mov_b32 s18, s1
+; GFX6-NOHSA-NEXT: s_ashr_i32 s23, s1, 31
+; GFX6-NOHSA-NEXT: s_ashr_i32 s25, s1, 24
+; GFX6-NOHSA-NEXT: s_ashr_i32 s29, s3, 31
+; GFX6-NOHSA-NEXT: s_ashr_i32 s31, s3, 24
; GFX6-NOHSA-NEXT: s_ashr_i32 s33, s5, 31
-; GFX6-NOHSA-NEXT: s_ashr_i32 s49, s5, 24
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[38:39], s[34:35], 0x80000
-; GFX6-NOHSA-NEXT: s_ashr_i32 s19, s7, 31
-; GFX6-NOHSA-NEXT: s_ashr_i32 s21, s7, 24
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[52:53], s[30:31], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[54:55], s[28:29], 0x80000
+; GFX6-NOHSA-NEXT: s_ashr_i32 s35, s5, 24
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
+; GFX6-NOHSA-NEXT: s_ashr_i32 s11, s7, 31
+; GFX6-NOHSA-NEXT: s_ashr_i32 s17, s7, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s38, s4, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s48, s2, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s36, s2, 24
+; GFX6-NOHSA-NEXT: s_lshr_b32 s46, s2, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s42, s0, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s40, s0, 24
; GFX6-NOHSA-NEXT: s_lshr_b32 s28, s0, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s7, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s34, s7, 8
-; GFX6-NOHSA-NEXT: s_lshr_b32 s36, s5, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s40, s5, 8
-; GFX6-NOHSA-NEXT: s_mov_b32 s46, s5
-; GFX6-NOHSA-NEXT: s_lshr_b32 s42, s3, 16
-; GFX6-NOHSA-NEXT: s_lshr_b32 s44, s3, 8
-; GFX6-NOHSA-NEXT: s_mov_b32 s50, s3
-; GFX6-NOHSA-NEXT: s_lshr_b32 s48, s1, 16
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[56:57], s[0:1], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[56:57], s[24:25], 0x80000
+; GFX6-NOHSA-NEXT: s_lshr_b32 s24, s7, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s30, s7, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s34, s5, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s44, s5, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s50, s3, 16
+; GFX6-NOHSA-NEXT: s_lshr_b32 s52, s3, 8
+; GFX6-NOHSA-NEXT: s_lshr_b32 s54, s1, 16
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[58:59], s[0:1], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[58:59], s[4:5], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[60:61], s[6:7], 0x80000
-; GFX6-NOHSA-NEXT: s_lshr_b32 s4, s1, 8
-; GFX6-NOHSA-NEXT: s_mov_b32 s6, s1
-; GFX6-NOHSA-NEXT: s_mov_b32 s0, s8
-; GFX6-NOHSA-NEXT: s_mov_b32 s1, s9
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s60
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s61
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s58
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s59
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s2
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s3
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s56
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s57
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s38
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s39
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s54
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s55
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v20, s52
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v21, s53
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v22, s21
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v23, s19
-; GFX6-NOHSA-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[26:27], 0x80000
-; GFX6-NOHSA-NEXT: s_mov_b32 s2, -1
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v24, s8
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:208
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s20
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s21
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v25, s9
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v26, s22
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v27, s23
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s24
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s25
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:144
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[20:21], s[6:7], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[22:23], s[50:51], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[24:25], s[46:47], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[48:49], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[8:9], s[44:45], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[26:27], s[42:43], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[38:39], s[40:41], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
+; GFX6-NOHSA-NEXT: s_lshr_b32 s0, s1, 8
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s6
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s7
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s4
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s5
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s2
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s3
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s58
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s59
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s26
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s27
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s17
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s11
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s56
+; GFX6-NOHSA-NEXT: s_mov_b32 s11, 0xf000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[2:3], s[10:11], 0x80000
+; GFX6-NOHSA-NEXT: s_mov_b32 s10, -1
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s57
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s2
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s3
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[14:17], off, s[8:11], 0 offset:208
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[4:5], s[18:19], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[14:15], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[26:27], s[12:13], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[56:57], s[22:23], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[58:59], s[16:17], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[60:61], s[20:21], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
; GFX6-NOHSA-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[40:41], s[16:17], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[42:43], s[14:15], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[44:45], s[12:13], 0x80000
-; GFX6-NOHSA-NEXT: s_bfe_i64 s[46:47], s[10:11], 0x80000
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:128
-; GFX6-NOHSA-NEXT: s_waitcnt expcnt(2)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s46
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s47
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s44
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s45
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[2:3], s[54:55], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[6:7], s[52:53], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[12:13], s[50:51], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[14:15], s[44:45], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[16:17], s[34:35], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[20:21], s[30:31], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[44:45], s[28:29], 0x80000
+; GFX6-NOHSA-NEXT: s_bfe_i64 s[50:51], s[24:25], 0x80000
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s58
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s59
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:192
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s49
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s33
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s42
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s43
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s40
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s41
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s18
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s19
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s56
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s57
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s60
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s61
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:144
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s35
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s33
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s24
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s25
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s28
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s29
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s17
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s15
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v20, s30
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v21, s31
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:240
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s22
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s23
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s34
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s35
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:224
-; GFX6-NOHSA-NEXT: s_waitcnt expcnt(2)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v14, s13
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v15, s11
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s36
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s37
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s38
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s39
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[6:9], off, s[8:11], 0 offset:128
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s20
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s21
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s38
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s39
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:160
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s48
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s49
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s36
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s37
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[6:9], off, s[8:11], 0 offset:80
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s26
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s27
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s8
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s9
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:96
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v8, s26
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v9, s27
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s46
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s47
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:64
; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s6
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s7
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s4
-; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s5
-; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s42
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s43
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s40
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s41
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16
+; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v6, s31
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v7, s29
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s44
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s45
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[2:5], off, s[8:11], 0
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v16, s18
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v17, s19
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s50
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s51
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[10:13], off, s[8:11], 0 offset:240
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v20, s25
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v21, s23
+; GFX6-NOHSA-NEXT: s_waitcnt expcnt(1)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s20
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s21
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224
+; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v12, s16
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v13, s17
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[8:11], 0 offset:176
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v10, s14
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v11, s15
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:160
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v4, s12
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v5, s13
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:112
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s6
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s7
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[8:11], 0 offset:96
+; GFX6-NOHSA-NEXT: s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v18, s2
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v19, s3
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[18:21], off, s[8:11], 0 offset:48
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v2, s0
+; GFX6-NOHSA-NEXT: v_mov_b32_e32 v3, s1
+; GFX6-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
; GFX6-NOHSA-NEXT: s_endpgm
;
; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i64:
; GFX7-HSA: ; %bb.0:
; GFX7-HSA-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0
; GFX7-HSA-NEXT: s_add_i32 s12, s12, s17
-; GFX7-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GFX7-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GFX7-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr12_sgpr13
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr22_sgpr23
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr14_sgpr15
; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
+; GFX7-HSA-NEXT: ; implicit-def: $sgpr10_sgpr11
; GFX7-HSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-HSA-NEXT: s_lshr_b32 s12, s6, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s10, s6, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s16, s6, 16
+; GFX7-HSA-NEXT: s_mov_b32 s12, s1
; GFX7-HSA-NEXT: s_ashr_i32 s33, s1, 31
-; GFX7-HSA-NEXT: s_ashr_i32 s37, s1, 24
-; GFX7-HSA-NEXT: s_lshr_b32 s34, s0, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s30, s0, 24
+; GFX7-HSA-NEXT: s_ashr_i32 s42, s1, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s20, s6, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s36, s0, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s34, s0, 24
; GFX7-HSA-NEXT: s_lshr_b32 s28, s0, 8
-; GFX7-HSA-NEXT: s_lshr_b32 s64, s1, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s66, s1, 8
-; GFX7-HSA-NEXT: s_mov_b32 s68, s1
-; GFX7-HSA-NEXT: s_bfe_i64 s[22:23], s[0:1], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[0:1], s[12:13], 0x80000
-; GFX7-HSA-NEXT: s_lshr_b32 s36, s6, 8
-; GFX7-HSA-NEXT: s_lshr_b32 s40, s4, 16
-; GFX7-HSA-NEXT: s_ashr_i32 s41, s3, 31
-; GFX7-HSA-NEXT: s_lshr_b32 s50, s4, 24
-; GFX7-HSA-NEXT: s_lshr_b32 s52, s4, 8
-; GFX7-HSA-NEXT: s_lshr_b32 s54, s2, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s56, s2, 24
-; GFX7-HSA-NEXT: s_lshr_b32 s42, s2, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s70, s1, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s72, s1, 8
+; GFX7-HSA-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[0:1], s[16:17], 0x80000
+; GFX7-HSA-NEXT: s_mov_b32 s22, s7
+; GFX7-HSA-NEXT: s_mov_b32 s14, s5
+; GFX7-HSA-NEXT: s_mov_b32 s10, s3
+; GFX7-HSA-NEXT: s_lshr_b32 s50, s6, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s52, s4, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s54, s4, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s56, s4, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s58, s2, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s60, s2, 24
+; GFX7-HSA-NEXT: s_lshr_b32 s40, s2, 8
; GFX7-HSA-NEXT: s_lshr_b32 s26, s7, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s20, s7, 8
-; GFX7-HSA-NEXT: s_mov_b32 s24, s7
-; GFX7-HSA-NEXT: s_lshr_b32 s18, s5, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s14, s5, 8
-; GFX7-HSA-NEXT: s_mov_b32 s16, s5
-; GFX7-HSA-NEXT: s_lshr_b32 s58, s3, 16
-; GFX7-HSA-NEXT: s_lshr_b32 s60, s3, 8
-; GFX7-HSA-NEXT: s_mov_b32 s62, s3
+; GFX7-HSA-NEXT: s_lshr_b32 s62, s7, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s24, s5, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s64, s5, 8
+; GFX7-HSA-NEXT: s_lshr_b32 s66, s3, 16
+; GFX7-HSA-NEXT: s_lshr_b32 s68, s3, 8
; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s0
; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s1
-; GFX7-HSA-NEXT: s_bfe_i64 s[0:1], s[10:11], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[0:1], s[20:21], 0x80000
+; GFX7-HSA-NEXT: s_ashr_i32 s43, s3, 31
; GFX7-HSA-NEXT: s_ashr_i32 s44, s3, 24
; GFX7-HSA-NEXT: s_ashr_i32 s45, s5, 31
; GFX7-HSA-NEXT: s_ashr_i32 s46, s5, 24
; GFX7-HSA-NEXT: s_ashr_i32 s47, s7, 31
; GFX7-HSA-NEXT: s_ashr_i32 s48, s7, 24
-; GFX7-HSA-NEXT: s_bfe_i64 s[38:39], s[2:3], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[70:71], s[4:5], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[72:73], s[6:7], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[38:39], s[4:5], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[74:75], s[6:7], 0x80000
; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s0
; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s1
-; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[0:1], s[66:67], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[64:65], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[62:63], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[60:61], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[58:59], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[2:3], s[12:13], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[0:1], s[72:73], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[4:5], s[70:71], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[6:7], s[68:69], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[12:13], s[66:67], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[16:17], s[14:15], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[14:15], s[64:65], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[20:21], s[24:25], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[24:25], s[22:23], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[22:23], s[62:63], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x80000
+; GFX7-HSA-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000
; GFX7-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[58:59], s[40:41], 0x80000
-; GFX7-HSA-NEXT: s_bfe_i64 s[60:61], s[36:37], 0x80000
; GFX7-HSA-NEXT: s_add_u32 s62, s8, 0xd0
; GFX7-HSA-NEXT: s_addc_u32 s63, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s60
-; GFX7-HSA-NEXT: s_add_u32 s60, s8, 0xc0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s61
-; GFX7-HSA-NEXT: s_addc_u32 s61, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s50
-; GFX7-HSA-NEXT: s_add_u32 s50, s8, 0x90
-; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s51
+; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s50
+; GFX7-HSA-NEXT: s_add_u32 s50, s8, 0xc0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s51
; GFX7-HSA-NEXT: s_addc_u32 s51, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v26, s50
-; GFX7-HSA-NEXT: v_mov_b32_e32 v27, s51
-; GFX7-HSA-NEXT: s_add_u32 s50, s8, 0x80
+; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s50
+; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s51
+; GFX7-HSA-NEXT: s_add_u32 s50, s8, 0x90
; GFX7-HSA-NEXT: s_addc_u32 s51, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s38
+; GFX7-HSA-NEXT: s_add_u32 s38, s8, 0x80
+; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s39
+; GFX7-HSA-NEXT: s_addc_u32 s39, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s30
+; GFX7-HSA-NEXT: s_add_u32 s30, s8, 0x50
+; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s31
+; GFX7-HSA-NEXT: s_addc_u32 s31, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v30, s30
; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s62
-; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s38
-; GFX7-HSA-NEXT: s_add_u32 s38, s8, 0x50
+; GFX7-HSA-NEXT: v_mov_b32_e32 v31, s31
+; GFX7-HSA-NEXT: s_add_u32 s30, s8, 64
; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s63
-; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s39
-; GFX7-HSA-NEXT: s_addc_u32 s39, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s60
+; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s74
+; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s75
+; GFX7-HSA-NEXT: v_mov_b32_e32 v26, s50
+; GFX7-HSA-NEXT: s_addc_u32 s31, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s52
+; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s53
+; GFX7-HSA-NEXT: v_mov_b32_e32 v10, s54
+; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s55
+; GFX7-HSA-NEXT: v_mov_b32_e32 v27, s51
+; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s58
+; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s59
+; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s60
+; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s61
; GFX7-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3]
-; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s72
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s34
-; GFX7-HSA-NEXT: s_add_u32 s34, s8, 64
-; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s73
-; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s61
-; GFX7-HSA-NEXT: v_mov_b32_e32 v30, s38
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s35
-; GFX7-HSA-NEXT: s_addc_u32 s35, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s58
-; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s59
-; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s54
-; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s55
-; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s56
-; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s57
-; GFX7-HSA-NEXT: v_mov_b32_e32 v31, s39
; GFX7-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
; GFX7-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
-; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s22
-; GFX7-HSA-NEXT: s_add_u32 s22, s8, 16
-; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s23
-; GFX7-HSA-NEXT: s_addc_u32 s23, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s18
+; GFX7-HSA-NEXT: s_add_u32 s18, s8, 16
+; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s19
+; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0
; GFX7-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19]
-; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s34
-; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s22
-; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s23
-; GFX7-HSA-NEXT: s_add_u32 s22, s8, 0xf0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s30
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s31
-; GFX7-HSA-NEXT: s_addc_u32 s23, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s22
-; GFX7-HSA-NEXT: v_mov_b32_e32 v28, s50
-; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s42
-; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s43
-; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s35
-; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s23
-; GFX7-HSA-NEXT: s_add_u32 s22, s8, 0xe0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v24, s30
+; GFX7-HSA-NEXT: v_mov_b32_e32 v16, s18
+; GFX7-HSA-NEXT: v_mov_b32_e32 v17, s19
+; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0xf0
+; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v18, s18
+; GFX7-HSA-NEXT: v_mov_b32_e32 v19, s19
+; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0xe0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v22, s40
+; GFX7-HSA-NEXT: v_mov_b32_e32 v23, s41
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s36
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s37
+; GFX7-HSA-NEXT: v_mov_b32_e32 v25, s31
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s34
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s35
+; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0
+; GFX7-HSA-NEXT: v_mov_b32_e32 v28, s38
+; GFX7-HSA-NEXT: flat_store_dwordx4 v[24:25], v[20:23]
; GFX7-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
-; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s70
+; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s19
; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s8
-; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s71
-; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s52
-; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s53
-; GFX7-HSA-NEXT: v_mov_b32_e32 v29, s51
+; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s56
+; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s57
+; GFX7-HSA-NEXT: v_mov_b32_e32 v29, s39
; GFX7-HSA-NEXT: v_mov_b32_e32 v6, s28
; GFX7-HSA-NEXT: v_mov_b32_e32 v7, s29
-; GFX7-HSA-NEXT: flat_store_dwordx4 v[24:25], v[20:23]
-; GFX7-HSA-NEXT: s_addc_u32 s23, s9, 0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s22
+; GFX7-HSA-NEXT: v_mov_b32_e32 v20, s18
; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s9
+; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0xb0
; GFX7-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15]
; GFX7-HSA-NEXT: v_mov_b32_e32 v8, s26
; GFX7-HSA-NEXT: v_mov_b32_e32 v9, s27
@@ -8510,17 +8531,15 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX7-HSA-NEXT: v_mov_b32_e32 v11, s47
; GFX7-HSA-NEXT: v_mov_b32_e32 v12, s24
; GFX7-HSA-NEXT: v_mov_b32_e32 v13, s25
-; GFX7-HSA-NEXT: v_mov_b32_e32 v21, s23
-; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s20
-; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s21
+; GFX7-HSA-NEXT: v_mov_b32_e32 v14, s22
+; GFX7-HSA-NEXT: v_mov_b32_e32 v15, s23
; GFX7-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; GFX7-HSA-NEXT: flat_store_dwordx4 v[18:19], v[8:11]
; GFX7-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15]
-; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s18
-; GFX7-HSA-NEXT: s_add_u32 s18, s8, 0xb0
-; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s19
; GFX7-HSA-NEXT: s_addc_u32 s19, s9, 0
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s18
+; GFX7-HSA-NEXT: v_mov_b32_e32 v0, s20
+; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s21
; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s46
; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s45
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s19
@@ -8542,7 +8561,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX7-HSA-NEXT: s_addc_u32 s13, s9, 0
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s12
; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s44
-; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s41
+; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s43
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s13
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX7-HSA-NEXT: s_nop 0
@@ -8561,7 +8580,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX7-HSA-NEXT: v_mov_b32_e32 v1, s5
; GFX7-HSA-NEXT: s_addc_u32 s5, s9, 0
; GFX7-HSA-NEXT: v_mov_b32_e32 v4, s4
-; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s37
+; GFX7-HSA-NEXT: v_mov_b32_e32 v2, s42
; GFX7-HSA-NEXT: v_mov_b32_e32 v3, s33
; GFX7-HSA-NEXT: v_mov_b32_e32 v5, s5
; GFX7-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -8580,95 +8599,101 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i64:
; GFX8-NOHSA: ; %bb.0:
; GFX8-NOHSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr24_sgpr25
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr14_sgpr15
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NOHSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0
+; GFX8-NOHSA-NEXT: ; implicit-def: $sgpr10_sgpr11
; GFX8-NOHSA-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NOHSA-NEXT: s_lshr_b32 s46, s6, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s48, s6, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s50, s6, 8
-; GFX8-NOHSA-NEXT: s_lshr_b32 s52, s4, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s54, s4, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s56, s4, 8
-; GFX8-NOHSA-NEXT: s_lshr_b32 s58, s2, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s60, s2, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s40, s2, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s40, s6, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s42, s6, 24
+; GFX8-NOHSA-NEXT: s_lshr_b32 s46, s6, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s48, s4, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s52, s4, 24
+; GFX8-NOHSA-NEXT: s_lshr_b32 s54, s4, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s56, s2, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s58, s2, 24
+; GFX8-NOHSA-NEXT: s_lshr_b32 s44, s2, 8
; GFX8-NOHSA-NEXT: s_lshr_b32 s36, s0, 16
; GFX8-NOHSA-NEXT: s_lshr_b32 s34, s0, 24
-; GFX8-NOHSA-NEXT: s_lshr_b32 s28, s0, 8
-; GFX8-NOHSA-NEXT: s_lshr_b32 s62, s7, 16
-; GFX8-NOHSA-NEXT: s_lshr_b32 s24, s7, 8
-; GFX8-NOHSA-NEXT: s_mov_b32 s22, s7
-; GFX8-NOHSA-NEXT: s_lshr_b32 s64, s5, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s30, s0, 8
+; GFX8-NOHSA-NEXT: s_lshr_b32 s60, s7, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s26, s7, 8
+; GFX8-NOHSA-NEXT: s_mov_b32 s24, s7
+; GFX8-NOHSA-NEXT: s_lshr_b32 s62, s5, 16
; GFX8-NOHSA-NEXT: s_lshr_b32 s20, s5, 8
; GFX8-NOHSA-NEXT: s_mov_b32 s18, s5
-; GFX8-NOHSA-NEXT: s_lshr_b32 s66, s3, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s50, s3, 16
; GFX8-NOHSA-NEXT: s_lshr_b32 s16, s3, 8
; GFX8-NOHSA-NEXT: s_mov_b32 s14, s3
-; GFX8-NOHSA-NEXT: s_lshr_b32 s44, s1, 16
+; GFX8-NOHSA-NEXT: s_lshr_b32 s64, s1, 16
; GFX8-NOHSA-NEXT: s_lshr_b32 s12, s1, 8
; GFX8-NOHSA-NEXT: s_mov_b32 s10, s1
-; GFX8-NOHSA-NEXT: s_ashr_i32 s63, s5, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[26:27], s[0:1], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x80000
+; GFX8-NOHSA-NEXT: s_ashr_i32 s41, s1, 24
+; GFX8-NOHSA-NEXT: s_ashr_i32 s43, s3, 31
+; GFX8-NOHSA-NEXT: s_ashr_i32 s47, s3, 24
+; GFX8-NOHSA-NEXT: s_ashr_i32 s49, s5, 31
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[22:23], s[0:1], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[28:29], s[2:3], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[38:39], s[4:5], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[68:69], s[6:7], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[66:67], s[6:7], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GFX8-NOHSA-NEXT: s_ashr_i32 s33, s1, 31
-; GFX8-NOHSA-NEXT: s_ashr_i32 s42, s1, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[0:1], s[44:45], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[0:1], s[64:65], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
-; GFX8-NOHSA-NEXT: s_ashr_i32 s43, s3, 31
-; GFX8-NOHSA-NEXT: s_ashr_i32 s44, s3, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[66:67], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[2:3], s[50:51], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GFX8-NOHSA-NEXT: s_ashr_i32 s45, s5, 31
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[64:65], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
+; GFX8-NOHSA-NEXT: s_ashr_i32 s50, s5, 24
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[4:5], s[62:63], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
-; GFX8-NOHSA-NEXT: s_ashr_i32 s64, s7, 31
-; GFX8-NOHSA-NEXT: s_ashr_i32 s65, s7, 24
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[62:63], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
+; GFX8-NOHSA-NEXT: s_ashr_i32 s51, s7, 31
+; GFX8-NOHSA-NEXT: s_ashr_i32 s70, s7, 24
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[6:7], s[60:61], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000
; GFX8-NOHSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
-; GFX8-NOHSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s46
-; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0xd0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s47
-; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s48
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s49
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47
-; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0xc0
-; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[60:61], s[48:49], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[62:63], s[46:47], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[64:65], s[42:43], 0x80000
+; GFX8-NOHSA-NEXT: s_bfe_i64 s[68:69], s[40:41], 0x80000
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s64
+; GFX8-NOHSA-NEXT: s_add_u32 s64, s8, 0xd0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s65
+; GFX8-NOHSA-NEXT: s_addc_u32 s65, s9, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s64
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s68
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s69
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s50
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s51
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47
-; GFX8-NOHSA-NEXT: s_add_u32 s46, s8, 0x90
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s65
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_addc_u32 s47, s9, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s46
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s52
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s53
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s54
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s55
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s47
+; GFX8-NOHSA-NEXT: s_nop 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s62
+; GFX8-NOHSA-NEXT: s_add_u32 s62, s8, 0xc0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s63
+; GFX8-NOHSA-NEXT: s_addc_u32 s63, s9, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s62
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s66
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s67
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s63
+; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GFX8-NOHSA-NEXT: s_nop 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s52
+; GFX8-NOHSA-NEXT: s_add_u32 s52, s8, 0x90
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s53
+; GFX8-NOHSA-NEXT: s_addc_u32 s53, s9, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s52
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s60
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s61
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s53
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_nop 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s38
@@ -8676,43 +8701,43 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s39
; GFX8-NOHSA-NEXT: s_addc_u32 s39, s9, 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s38
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s56
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s57
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s54
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s55
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s39
; GFX8-NOHSA-NEXT: s_add_u32 s38, s8, 0x50
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_addc_u32 s39, s9, 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s38
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s58
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s59
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s60
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s61
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s56
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s57
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s58
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s59
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s39
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_nop 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s30
-; GFX8-NOHSA-NEXT: s_add_u32 s30, s8, 64
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s31
-; GFX8-NOHSA-NEXT: s_addc_u32 s31, s9, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s30
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s40
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s41
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s31
-; GFX8-NOHSA-NEXT: s_add_u32 s30, s8, 16
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s28
+; GFX8-NOHSA-NEXT: s_add_u32 s28, s8, 64
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s29
+; GFX8-NOHSA-NEXT: s_addc_u32 s29, s9, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s28
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s44
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s45
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s29
+; GFX8-NOHSA-NEXT: s_add_u32 s28, s8, 16
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NOHSA-NEXT: s_addc_u32 s31, s9, 0
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s30
+; GFX8-NOHSA-NEXT: s_addc_u32 s29, s9, 0
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s28
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s36
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s37
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s34
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s35
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s31
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s29
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s8
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s26
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s27
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s28
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s29
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s22
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s30
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s31
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s9
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_nop 0
@@ -8721,17 +8746,17 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s7
; GFX8-NOHSA-NEXT: s_addc_u32 s7, s9, 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s65
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s64
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s70
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s51
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NOHSA-NEXT: s_add_u32 s6, s8, 0xe0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_addc_u32 s7, s9, 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s6
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s22
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s23
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s24
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s25
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v0, s24
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s25
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s26
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s27
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NOHSA-NEXT: s_nop 0
@@ -8740,8 +8765,8 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NOHSA-NEXT: s_addc_u32 s5, s9, 0
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s4
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s63
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s45
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s50
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s49
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NOHSA-NEXT: s_add_u32 s4, s8, 0xa0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -8761,7 +8786,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NOHSA-NEXT: s_add_u32 s2, s8, 0x60
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s44
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s47
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s43
; GFX8-NOHSA-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -8780,7 +8805,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NOHSA-NEXT: s_add_u32 s0, s8, 32
-; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s42
+; GFX8-NOHSA-NEXT: v_mov_b32_e32 v2, s41
; GFX8-NOHSA-NEXT: v_mov_b32_e32 v3, s33
; GFX8-NOHSA-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NOHSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -8985,88 +9010,95 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX12-LABEL: constant_sextload_v32i8_to_v32i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24
+; GFX12-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GFX12-NEXT: ; implicit-def: $sgpr12_sgpr13
+; GFX12-NEXT: ; implicit-def: $sgpr24_sgpr25
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0
+; GFX12-NEXT: ; implicit-def: $sgpr10_sgpr11
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshr_b32 s34, s6, 16
-; GFX12-NEXT: s_lshr_b32 s36, s6, 24
-; GFX12-NEXT: s_lshr_b32 s38, s6, 8
-; GFX12-NEXT: s_lshr_b32 s40, s4, 16
-; GFX12-NEXT: s_lshr_b32 s42, s4, 24
-; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
+; GFX12-NEXT: s_lshr_b32 s36, s6, 16
+; GFX12-NEXT: s_lshr_b32 s38, s6, 24
+; GFX12-NEXT: s_lshr_b32 s40, s6, 8
+; GFX12-NEXT: s_lshr_b32 s42, s4, 16
+; GFX12-NEXT: s_lshr_b32 s44, s4, 24
; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
-; GFX12-NEXT: s_lshr_b32 s44, s4, 8
-; GFX12-NEXT: s_bfe_i64 s[66:67], s[6:7], 0x80000
; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
-; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GFX12-NEXT: s_lshr_b32 s46, s4, 8
+; GFX12-NEXT: s_bfe_i64 s[66:67], s[6:7], 0x80000
; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s37
-; GFX12-NEXT: v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v5, s67
-; GFX12-NEXT: s_lshr_b32 s28, s2, 16
-; GFX12-NEXT: s_lshr_b32 s46, s2, 24
-; GFX12-NEXT: s_bfe_i64 s[64:65], s[4:5], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s37
; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v4, s66 :: v_dual_mov_b32 v7, s39
-; GFX12-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v9, s41
-; GFX12-NEXT: s_lshr_b32 s48, s2, 8
-; GFX12-NEXT: v_dual_mov_b32 v8, s40 :: v_dual_mov_b32 v11, s43
-; GFX12-NEXT: v_dual_mov_b32 v10, s42 :: v_dual_mov_b32 v13, s65
-; GFX12-NEXT: s_lshr_b32 s50, s0, 16
-; GFX12-NEXT: s_lshr_b32 s52, s0, 24
+; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s39
+; GFX12-NEXT: v_dual_mov_b32 v2, s38 :: v_dual_mov_b32 v5, s67
+; GFX12-NEXT: s_lshr_b32 s30, s2, 16
+; GFX12-NEXT: s_lshr_b32 s48, s2, 24
+; GFX12-NEXT: s_bfe_i64 s[64:65], s[4:5], 0x80000
; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v12, s64 :: v_dual_mov_b32 v15, s45
-; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
-; GFX12-NEXT: v_mov_b32_e32 v14, s44
-; GFX12-NEXT: s_lshr_b32 s54, s0, 8
-; GFX12-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v4, s66 :: v_dual_mov_b32 v7, s41
+; GFX12-NEXT: v_dual_mov_b32 v6, s40 :: v_dual_mov_b32 v9, s43
+; GFX12-NEXT: s_lshr_b32 s50, s2, 8
+; GFX12-NEXT: v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45
+; GFX12-NEXT: v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s65
+; GFX12-NEXT: s_lshr_b32 s52, s0, 16
+; GFX12-NEXT: s_lshr_b32 s54, s0, 24
; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v12, s64 :: v_dual_mov_b32 v15, s47
+; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
+; GFX12-NEXT: v_mov_b32_e32 v14, s46
+; GFX12-NEXT: s_lshr_b32 s34, s0, 8
+; GFX12-NEXT: s_bfe_i64 s[28:29], s[2:3], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
; GFX12-NEXT: s_lshr_b32 s56, s7, 16
; GFX12-NEXT: s_lshr_b32 s58, s5, 16
-; GFX12-NEXT: s_lshr_b32 s60, s1, 8
-; GFX12-NEXT: s_mov_b32 s62, s1
+; GFX12-NEXT: s_lshr_b32 s60, s5, 8
+; GFX12-NEXT: s_mov_b32 s18, s5
+; GFX12-NEXT: s_lshr_b32 s20, s3, 8
+; GFX12-NEXT: s_mov_b32 s12, s3
+; GFX12-NEXT: s_lshr_b32 s14, s1, 16
+; GFX12-NEXT: s_lshr_b32 s62, s1, 8
+; GFX12-NEXT: s_mov_b32 s10, s1
; GFX12-NEXT: s_ashr_i32 s57, s1, 24
; GFX12-NEXT: s_ashr_i32 s59, s3, 31
; GFX12-NEXT: s_ashr_i32 s61, s3, 24
; GFX12-NEXT: s_ashr_i32 s63, s5, 31
+; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000
; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:208
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:192
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:144
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:128
-; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v3, s47
-; GFX12-NEXT: v_dual_mov_b32 v1, s29 :: v_dual_mov_b32 v2, s46
-; GFX12-NEXT: v_mov_b32_e32 v5, s31
+; GFX12-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v3, s49
+; GFX12-NEXT: v_dual_mov_b32 v1, s31 :: v_dual_mov_b32 v2, s48
+; GFX12-NEXT: v_mov_b32_e32 v5, s29
; GFX12-NEXT: s_lshr_b32 s26, s7, 8
; GFX12-NEXT: s_mov_b32 s24, s7
; GFX12-NEXT: s_bfe_i64 s[22:23], s[0:1], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v4, s30 :: v_dual_mov_b32 v7, s49
-; GFX12-NEXT: v_dual_mov_b32 v6, s48 :: v_dual_mov_b32 v9, s51
-; GFX12-NEXT: s_lshr_b32 s18, s5, 8
-; GFX12-NEXT: s_mov_b32 s20, s5
+; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v4, s28 :: v_dual_mov_b32 v7, s51
+; GFX12-NEXT: v_dual_mov_b32 v6, s50 :: v_dual_mov_b32 v9, s53
; GFX12-NEXT: s_lshr_b32 s16, s3, 16
-; GFX12-NEXT: s_lshr_b32 s12, s3, 8
-; GFX12-NEXT: s_mov_b32 s14, s3
-; GFX12-NEXT: s_lshr_b32 s10, s1, 16
; GFX12-NEXT: s_ashr_i32 s33, s1, 31
-; GFX12-NEXT: s_bfe_i64 s[2:3], s[62:63], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[0:1], s[60:61], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[2:3], s[10:11], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[0:1], s[62:63], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[10:11], s[14:15], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[14:15], s[12:13], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[12:13], s[20:21], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[20:21], s[18:19], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[18:19], s[60:61], 0x80000
; GFX12-NEXT: s_ashr_i32 s60, s5, 24
; GFX12-NEXT: s_bfe_i64 s[4:5], s[58:59], 0x80000
; GFX12-NEXT: s_ashr_i32 s58, s7, 31
; GFX12-NEXT: s_ashr_i32 s62, s7, 24
; GFX12-NEXT: s_bfe_i64 s[6:7], s[56:57], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v8, s50 :: v_dual_mov_b32 v11, s53
-; GFX12-NEXT: v_dual_mov_b32 v10, s52 :: v_dual_mov_b32 v13, s23
+; GFX12-NEXT: v_dual_mov_b32 v8, s52 :: v_dual_mov_b32 v11, s55
+; GFX12-NEXT: v_dual_mov_b32 v10, s54 :: v_dual_mov_b32 v13, s23
; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v12, s22 :: v_dual_mov_b32 v15, s55
-; GFX12-NEXT: v_dual_mov_b32 v14, s54 :: v_dual_mov_b32 v17, s7
-; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v12, s22 :: v_dual_mov_b32 v15, s35
+; GFX12-NEXT: v_dual_mov_b32 v14, s34 :: v_dual_mov_b32 v17, s7
; GFX12-NEXT: v_dual_mov_b32 v16, s6 :: v_dual_mov_b32 v19, s58
; GFX12-NEXT: v_dual_mov_b32 v18, s62 :: v_dual_mov_b32 v21, s25
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
@@ -9081,20 +9113,16 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[8:9] offset:224
; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s63
; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s60
-; GFX12-NEXT: v_mov_b32_e32 v5, s21
-; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v7, s19
-; GFX12-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v9, s17
-; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v11, s59
-; GFX12-NEXT: v_dual_mov_b32 v10, s61 :: v_dual_mov_b32 v13, s15
-; GFX12-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v15, s13
-; GFX12-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v17, s11
-; GFX12-NEXT: v_dual_mov_b32 v16, s10 :: v_dual_mov_b32 v19, s33
-; GFX12-NEXT: v_dual_mov_b32 v18, s57 :: v_dual_mov_b32 v21, s3
-; GFX12-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v23, s1
-; GFX12-NEXT: v_mov_b32_e32 v22, s0
+; GFX12-NEXT: v_dual_mov_b32 v5, s21 :: v_dual_mov_b32 v4, s20
+; GFX12-NEXT: v_dual_mov_b32 v7, s19 :: v_dual_mov_b32 v6, s18
+; GFX12-NEXT: v_dual_mov_b32 v9, s17 :: v_dual_mov_b32 v8, s16
+; GFX12-NEXT: v_dual_mov_b32 v11, s59 :: v_dual_mov_b32 v10, s61
+; GFX12-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v12, s14
+; GFX12-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
+; GFX12-NEXT: v_dual_mov_b32 v17, s11 :: v_dual_mov_b32 v16, s10
+; GFX12-NEXT: v_dual_mov_b32 v19, s33 :: v_dual_mov_b32 v18, s57
+; GFX12-NEXT: v_dual_mov_b32 v21, s3 :: v_dual_mov_b32 v20, s2
+; GFX12-NEXT: v_dual_mov_b32 v23, s1 :: v_dual_mov_b32 v22, s0
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:176
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:160
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 09d3c3b01b809..c374c78c70ceb 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -5982,6 +5982,7 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out,
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr2_vgpr3
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
@@ -6005,6 +6006,7 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out,
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $vgpr2_vgpr3
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
@@ -6041,6 +6043,7 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out,
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr3_vgpr4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
@@ -6359,6 +6362,7 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr4_vgpr5
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
@@ -6393,6 +6397,7 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $vgpr4_vgpr5
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
@@ -6448,6 +6453,7 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr4_vgpr5
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
@@ -7010,6 +7016,8 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v2, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v1, 0, 16
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr28_vgpr29
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr28_vgpr29
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
@@ -7036,6 +7044,8 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GCN-HSA-NEXT: ; implicit-def: $vgpr8_vgpr9
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
@@ -7135,62 +7145,64 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v2
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v1, 0, 16
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v5
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v9
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v9, 0, 16
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v3
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v20, 16, v6
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v7
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v8
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v5, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v5
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v6
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v7
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v5, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v6, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v9, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v11, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v13, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v3, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v20, 16, v10
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v11
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v11
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v10, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v8, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v6, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v3, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v5, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v13, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v7, 0, 16
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v22, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v11, 0, 16
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v20, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr15_vgpr16
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr15_vgpr16
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:96
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-NOHSA-VI-NEXT: s_endpgm
;
; EG-LABEL: global_sextload_v16i16_to_v16i64:
@@ -8119,7 +8131,7 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3)
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v14
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v14
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v15
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v12
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
@@ -8129,7 +8141,7 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, v7
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v4
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v10
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v10
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v23, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v15
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v15
@@ -8177,7 +8189,7 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v14, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v21, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v20, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v19, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v1, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v8, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
@@ -8192,31 +8204,35 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:192
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v4, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v6, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v19, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v6, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v20, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v18, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v17, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v26, v16, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v0, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v17, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v16, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v0, 0, 16
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v0, 16, v2
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v2, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v0, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v27, v2, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v29, v0, 0, 16
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $vgpr16_vgpr17
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v24
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 31, v28
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 31, v26
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 31, v30
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v30, 31, v29
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:96
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:64
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:32
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0
@@ -8229,6 +8245,10 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GCN-HSA-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GCN-HSA-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GCN-HSA-NEXT: ; implicit-def: $vgpr16_vgpr17
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
@@ -8512,6 +8532,7 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr4_vgpr5
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
@@ -8520,6 +8541,9 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $vgpr4_vgpr5
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:64
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index f879dc660203f..8d1ed92ab3c11 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -5868,17 +5868,17 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(ptr addrspace(1) %out,
define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GCN-NOHSA-SI-LABEL: global_sextload_v4i8_to_v4i64:
; GCN-NOHSA-SI: ; %bb.0:
-; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
+; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v4, 24, v0
@@ -5891,8 +5891,8 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(ptr addrspace(1) %out,
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-NOHSA-SI-NEXT: s_endpgm
;
; GCN-HSA-LABEL: global_sextload_v4i8_to_v4i64:
@@ -6931,77 +6931,79 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out
; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr6_sgpr7
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr8_sgpr9
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s4, v2
; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s5, v3
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s6, v0
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s7, v1
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s4, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 24
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s4, 8
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s6, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s6, 24
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s6, 8
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s5, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s5, 8
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s26, s5
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s7, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s7, 8
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s10, v0
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s11, v1
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s4, 24
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s4, 8
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s10, 24
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s10, 8
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s5, 8
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s11, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s11, 8
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s11
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[10:11], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[4:5], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s7, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s7, 24
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s5, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s5, 24
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s11, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s11, 24
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[38:39], s[6:7], 0x80000
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s5, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s5, 24
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[30:31], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s38
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s33
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s26
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s27
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s12
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s13
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s14
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s15
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s33
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s38
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s39
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s14
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s15
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s17
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s31
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s29
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s17
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s29
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
-; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s24
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s25
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s12
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s13
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s10
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s11
@@ -7021,46 +7023,50 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN-HSA-NEXT: ; implicit-def: $sgpr4_sgpr5
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
+; GCN-HSA-NEXT: ; implicit-def: $sgpr2_sgpr3
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
-; GCN-HSA-NEXT: v_readfirstlane_b32 s2, v2
-; GCN-HSA-NEXT: v_readfirstlane_b32 s3, v3
-; GCN-HSA-NEXT: v_readfirstlane_b32 s4, v0
-; GCN-HSA-NEXT: v_readfirstlane_b32 s5, v1
-; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16
-; GCN-HSA-NEXT: s_lshr_b32 s8, s2, 24
-; GCN-HSA-NEXT: s_lshr_b32 s10, s2, 8
-; GCN-HSA-NEXT: s_lshr_b32 s18, s3, 16
-; GCN-HSA-NEXT: s_lshr_b32 s20, s3, 8
-; GCN-HSA-NEXT: s_mov_b32 s22, s3
-; GCN-HSA-NEXT: s_ashr_i32 s7, s3, 31
-; GCN-HSA-NEXT: s_ashr_i32 s9, s3, 24
-; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16
-; GCN-HSA-NEXT: s_lshr_b32 s14, s4, 24
-; GCN-HSA-NEXT: s_lshr_b32 s16, s4, 8
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
-; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000
-; GCN-HSA-NEXT: s_ashr_i32 s4, s5, 24
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
-; GCN-HSA-NEXT: s_lshr_b32 s2, s5, 16
-; GCN-HSA-NEXT: s_ashr_i32 s3, s5, 31
+; GCN-HSA-NEXT: v_readfirstlane_b32 s6, v2
+; GCN-HSA-NEXT: v_readfirstlane_b32 s7, v3
+; GCN-HSA-NEXT: v_readfirstlane_b32 s8, v0
+; GCN-HSA-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x80000
+; GCN-HSA-NEXT: s_ashr_i32 s4, s7, 31
+; GCN-HSA-NEXT: s_lshr_b32 s16, s8, 16
+; GCN-HSA-NEXT: s_lshr_b32 s18, s8, 24
+; GCN-HSA-NEXT: s_lshr_b32 s20, s8, 8
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25
+; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[8:9], 0x80000
+; GCN-HSA-NEXT: s_ashr_i32 s8, s7, 24
+; GCN-HSA-NEXT: v_mov_b32_e32 v7, s4
+; GCN-HSA-NEXT: s_ashr_i32 s4, s9, 24
+; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16
+; GCN-HSA-NEXT: s_lshr_b32 s12, s6, 24
+; GCN-HSA-NEXT: s_lshr_b32 s14, s6, 8
+; GCN-HSA-NEXT: s_lshr_b32 s22, s7, 16
+; GCN-HSA-NEXT: s_mov_b32 s2, s7
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24
+; GCN-HSA-NEXT: s_lshr_b32 s6, s7, 8
+; GCN-HSA-NEXT: v_mov_b32_e32 v6, s8
+; GCN-HSA-NEXT: s_ashr_i32 s7, s9, 31
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4
-; GCN-HSA-NEXT: s_lshr_b32 s4, s5, 8
-; GCN-HSA-NEXT: s_mov_b32 s24, s5
-; GCN-HSA-NEXT: v_mov_b32_e32 v6, s9
-; GCN-HSA-NEXT: v_mov_b32_e32 v7, s7
-; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3
-; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
+; GCN-HSA-NEXT: s_lshr_b32 s8, s9, 16
+; GCN-HSA-NEXT: s_mov_b32 s4, s9
+; GCN-HSA-NEXT: s_lshr_b32 s24, s9, 8
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s25
+; GCN-HSA-NEXT: v_mov_b32_e32 v11, s7
; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
@@ -7068,66 +7074,64 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17
-; GCN-HSA-NEXT: v_mov_b32_e32 v12, s6
-; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x50
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s20
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s21
+; GCN-HSA-NEXT: v_mov_b32_e32 v12, s10
+; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x50
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[2:5]
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, s7
-; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7
-; GCN-HSA-NEXT: s_add_u32 s6, s0, 64
-; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v9, s7
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
-; GCN-HSA-NEXT: v_mov_b32_e32 v8, s6
+; GCN-HSA-NEXT: v_mov_b32_e32 v13, s11
+; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11
+; GCN-HSA-NEXT: s_add_u32 s10, s0, 64
+; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v8, s10
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15
+; GCN-HSA-NEXT: v_mov_b32_e32 v9, s11
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
-; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s12
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
-; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s8
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s9
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s13
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v12, s12
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, s13
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s14
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s15
+; GCN-HSA-NEXT: v_mov_b32_e32 v12, s16
+; GCN-HSA-NEXT: v_mov_b32_e32 v13, s17
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s18
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s19
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s18
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s19
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s22
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s23
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[4:7]
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s21
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v8, s8
+; GCN-HSA-NEXT: v_mov_b32_e32 v9, s9
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: s_add_u32 s0, s0, 32
; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
-; GCN-HSA-NEXT: v_mov_b32_e32 v12, s24
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, s25
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s4
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s5
+; GCN-HSA-NEXT: v_mov_b32_e32 v12, s4
+; GCN-HSA-NEXT: v_mov_b32_e32 v13, s5
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s24
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s25
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
; GCN-HSA-NEXT: s_endpgm
@@ -7144,65 +7148,67 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr10_sgpr11
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s4, v2
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s6, v0
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s4, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s4, 24
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s8, v0
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s4, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s4, 24
; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s5, v3
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s7, v1
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s4, 8
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s6, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s6, 24
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s6, 8
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s4, 8
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s8, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s8, 24
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s8, 8
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s5, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 8
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, s5
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s7, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s7, 8
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s30, s7
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s5, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s5, 8
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s5
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s9, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s9, 8
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s9
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x80000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[4:5], 0x80000
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s7, 31
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s7, 24
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s9, 31
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s9, 24
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s14
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s15
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s17
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s16
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s17
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s18
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s19
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s5, 31
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s38, s5, 24
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[30:31], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[28:29], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[30:31], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[12:13], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[10:11], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[28:29], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s34
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s35
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, s20
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, s21
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, s22
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, s23
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s24
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s25
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, s22
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, s23
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, s24
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, s25
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s26
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s27
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, s38
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, s33
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, s31
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, s29
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s26
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s27
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s14
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s15
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v12, s8
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
@@ -8173,168 +8179,172 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr26_sgpr27
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr6_sgpr7
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GCN-NOHSA-SI-NEXT: ; implicit-def: $sgpr10_sgpr11
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s18, v2
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s19, v3
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s22, v0
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s23, v1
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s20, v2
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s21, v3
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s24, v0
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s25, v1
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s24, v6
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s25, v7
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s12, v4
-; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s13, v5
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s18, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s18, 24
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s18, 8
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s22, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s22, 24
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s22, 8
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s24, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s24, 24
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s24, 8
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s12, 16
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s12, 24
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s12, 8
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s19, 16
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[40:41], s[12:13], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[42:43], s[18:19], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[44:45], s[22:23], 0x80000
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s42
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s43
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[42:43], s[24:25], 0x80000
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s44
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s45
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s19, 8
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s42
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s43
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s19
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s40
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s41
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s23, 16
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s38
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s39
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s23, 8
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s36
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s37
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s22, s23
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[12:13], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s22, v6
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s23, v7
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s18, v4
+; GCN-NOHSA-SI-NEXT: v_readfirstlane_b32 s19, v5
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s20, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s20, 24
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s20, 8
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s24, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s24, 24
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s24, 8
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s22, 16
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s22, 24
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s22, 8
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s18, 16
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[20:21], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[24:25], 0x80000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s34
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s35
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[22:23], 0x80000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s36
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s37
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[18:19], 0x80000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s34
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s35
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s18, 24
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s36
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s37
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s18, 8
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[26:27], 0x80000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s30
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s31
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s21, 16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s34
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s35
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s21, 8
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s26, s21
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x80000
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:208
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s36
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s37
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s25, 16
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s34
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s25, 8
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s26
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s27
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s25, 16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s30
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s31
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s25, 8
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s25
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x80000
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s25, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s23, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s41, s23, 24
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s5, s19, 31
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s7, s19, 24
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s44, s25, 24
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s25
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s34
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s35
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s23, 16
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s23
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s28
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s29
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s13, 16
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s26
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s27
-; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s13, 8
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s23, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s25, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s25, 24
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s5, s21, 31
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s21, 24
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s23, 24
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[10:11], 0x80000
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s23, 8
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s28
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s29
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s19, 16
+; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s19
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s45, s13, 31
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s46, s19, 31
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5
-; GCN-NOHSA-SI-NEXT: s_ashr_i32 s46, s13, 24
-; GCN-NOHSA-SI-NEXT: s_mov_b32 s30, s13
+; GCN-NOHSA-SI-NEXT: s_ashr_i32 s47, s19, 24
+; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s19, 8
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[40:41], s[10:11], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[42:43], s[8:9], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[44:45], s[6:7], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[38:39], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[28:29], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[36:37], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[34:35], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[12:13], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[42:43], s[22:23], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[26:27], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[28:29], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[36:37], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[38:39], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[40:41], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
+; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:128
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s4
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s5
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s6
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s7
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s12
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s13
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s41
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s39
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s8
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s9
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s37
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s14
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s15
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s10
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s11
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s16
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s17
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s14
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s15
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s20
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s21
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:16
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s42
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s43
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s16
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s17
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s44
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s45
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s18
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s19
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s44
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s39
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s33
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s22
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s34
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s35
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s18
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s19
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s42
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s43
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s24
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s25
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s46
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s45
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s36
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s47
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s46
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s27
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:176
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s30
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s31
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s28
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s29
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s40
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s41
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:160
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s26
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s27
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s28
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s29
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s22
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s23
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s8
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s9
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s12
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s13
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s6
+; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s7
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
; GCN-NOHSA-SI-NEXT: s_endpgm
;
@@ -8344,6 +8354,9 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: s_add_i32 s12, s12, s17
; GCN-HSA-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
; GCN-HSA-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GCN-HSA-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GCN-HSA-NEXT: ; implicit-def: $sgpr4_sgpr5
+; GCN-HSA-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
@@ -8353,89 +8366,90 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
+; GCN-HSA-NEXT: ; implicit-def: $sgpr2_sgpr3
; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
-; GCN-HSA-NEXT: v_readfirstlane_b32 s6, v6
-; GCN-HSA-NEXT: v_readfirstlane_b32 s8, v4
-; GCN-HSA-NEXT: v_readfirstlane_b32 s9, v5
-; GCN-HSA-NEXT: v_readfirstlane_b32 s7, v7
-; GCN-HSA-NEXT: s_lshr_b32 s20, s6, 16
-; GCN-HSA-NEXT: s_lshr_b32 s18, s6, 24
-; GCN-HSA-NEXT: s_lshr_b32 s10, s8, 16
-; GCN-HSA-NEXT: s_lshr_b32 s2, s8, 24
-; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x80000
-; GCN-HSA-NEXT: s_lshr_b32 s16, s6, 8
-; GCN-HSA-NEXT: s_lshr_b32 s4, s8, 8
-; GCN-HSA-NEXT: s_lshr_b32 s12, s7, 16
-; GCN-HSA-NEXT: s_lshr_b32 s14, s7, 8
-; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x80000
-; GCN-HSA-NEXT: s_lshr_b32 s6, s9, 16
-; GCN-HSA-NEXT: s_mov_b32 s28, s9
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s26
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s27
-; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[18:19], 0x80000
+; GCN-HSA-NEXT: v_readfirstlane_b32 s10, v6
+; GCN-HSA-NEXT: v_readfirstlane_b32 s12, v4
+; GCN-HSA-NEXT: v_readfirstlane_b32 s11, v7
+; GCN-HSA-NEXT: v_readfirstlane_b32 s13, v5
+; GCN-HSA-NEXT: s_lshr_b32 s2, s10, 16
+; GCN-HSA-NEXT: s_lshr_b32 s8, s10, 24
+; GCN-HSA-NEXT: s_lshr_b32 s14, s10, 8
+; GCN-HSA-NEXT: s_lshr_b32 s16, s12, 16
+; GCN-HSA-NEXT: s_lshr_b32 s18, s12, 24
+; GCN-HSA-NEXT: s_lshr_b32 s26, s12, 8
+; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[10:11], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[12:13], 0x80000
+; GCN-HSA-NEXT: s_lshr_b32 s28, s11, 8
+; GCN-HSA-NEXT: s_mov_b32 s4, s11
+; GCN-HSA-NEXT: s_bfe_i64 s[40:41], s[2:3], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[42:43], s[8:9], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[44:45], s[16:17], 0x80000
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
-; GCN-HSA-NEXT: v_readfirstlane_b32 s40, v2
-; GCN-HSA-NEXT: v_readfirstlane_b32 s41, v3
-; GCN-HSA-NEXT: s_bfe_i64 s[42:43], s[10:11], 0x80000
-; GCN-HSA-NEXT: v_readfirstlane_b32 s44, v0
-; GCN-HSA-NEXT: v_readfirstlane_b32 s45, v1
-; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GCN-HSA-NEXT: s_mov_b32 s22, s7
-; GCN-HSA-NEXT: s_lshr_b32 s8, s9, 8
-; GCN-HSA-NEXT: v_mov_b32_e32 v8, s24
-; GCN-HSA-NEXT: v_mov_b32_e32 v9, s25
-; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[16:17], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[28:29], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[14:15], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[12:13], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[46:47], s[4:5], 0x80000
-; GCN-HSA-NEXT: v_mov_b32_e32 v12, s20
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s26
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s27
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s42
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
-; GCN-HSA-NEXT: s_lshr_b32 s42, s44, 16
-; GCN-HSA-NEXT: s_lshr_b32 s48, s44, 24
-; GCN-HSA-NEXT: s_lshr_b32 s28, s44, 8
-; GCN-HSA-NEXT: s_lshr_b32 s6, s45, 16
-; GCN-HSA-NEXT: s_lshr_b32 s2, s45, 8
-; GCN-HSA-NEXT: s_mov_b32 s4, s45
-; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[44:45], 0x80000
-; GCN-HSA-NEXT: s_lshr_b32 s44, s40, 16
-; GCN-HSA-NEXT: s_lshr_b32 s50, s40, 24
-; GCN-HSA-NEXT: s_lshr_b32 s52, s40, 8
-; GCN-HSA-NEXT: s_lshr_b32 s20, s41, 16
-; GCN-HSA-NEXT: s_lshr_b32 s12, s41, 8
-; GCN-HSA-NEXT: s_mov_b32 s14, s41
-; GCN-HSA-NEXT: s_ashr_i32 s33, s9, 31
-; GCN-HSA-NEXT: s_ashr_i32 s37, s7, 31
-; GCN-HSA-NEXT: s_ashr_i32 s38, s7, 24
-; GCN-HSA-NEXT: s_ashr_i32 s34, s9, 24
-; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, s21
-; GCN-HSA-NEXT: v_mov_b32_e32 v10, s30
-; GCN-HSA-NEXT: v_mov_b32_e32 v11, s31
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s43
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
-; GCN-HSA-NEXT: s_ashr_i32 s30, s45, 31
-; GCN-HSA-NEXT: s_ashr_i32 s31, s45, 24
-; GCN-HSA-NEXT: s_ashr_i32 s35, s41, 31
-; GCN-HSA-NEXT: s_ashr_i32 s36, s41, 24
-; GCN-HSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
+; GCN-HSA-NEXT: v_readfirstlane_b32 s46, v2
+; GCN-HSA-NEXT: v_readfirstlane_b32 s47, v3
+; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
+; GCN-HSA-NEXT: v_readfirstlane_b32 s48, v0
+; GCN-HSA-NEXT: v_readfirstlane_b32 s49, v1
+; GCN-HSA-NEXT: s_lshr_b32 s24, s11, 16
+; GCN-HSA-NEXT: s_lshr_b32 s30, s13, 16
+; GCN-HSA-NEXT: s_mov_b32 s6, s13
+; GCN-HSA-NEXT: s_lshr_b32 s10, s13, 8
+; GCN-HSA-NEXT: v_mov_b32_e32 v8, s20
+; GCN-HSA-NEXT: v_mov_b32_e32 v9, s21
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s22
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s23
+; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[28:29], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[26:27], 0x80000
+; GCN-HSA-NEXT: v_mov_b32_e32 v12, s40
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s42
+; GCN-HSA-NEXT: v_mov_b32_e32 v10, s14
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s44
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18
+; GCN-HSA-NEXT: s_mov_b32 s2, s47
+; GCN-HSA-NEXT: s_mov_b32 s8, s49
+; GCN-HSA-NEXT: s_lshr_b32 s40, s48, 16
+; GCN-HSA-NEXT: s_lshr_b32 s42, s48, 24
+; GCN-HSA-NEXT: s_lshr_b32 s28, s48, 8
+; GCN-HSA-NEXT: s_lshr_b32 s14, s49, 16
+; GCN-HSA-NEXT: s_lshr_b32 s4, s49, 8
+; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[48:49], 0x80000
+; GCN-HSA-NEXT: s_lshr_b32 s44, s46, 16
+; GCN-HSA-NEXT: s_lshr_b32 s48, s46, 24
+; GCN-HSA-NEXT: s_lshr_b32 s52, s46, 8
+; GCN-HSA-NEXT: s_lshr_b32 s18, s47, 16
+; GCN-HSA-NEXT: s_lshr_b32 s54, s47, 8
+; GCN-HSA-NEXT: s_ashr_i32 s37, s11, 31
+; GCN-HSA-NEXT: s_ashr_i32 s38, s11, 24
+; GCN-HSA-NEXT: s_ashr_i32 s33, s13, 31
+; GCN-HSA-NEXT: s_ashr_i32 s34, s13, 24
+; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[6:7], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[30:31], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
+; GCN-HSA-NEXT: v_mov_b32_e32 v13, s41
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s43
+; GCN-HSA-NEXT: v_mov_b32_e32 v11, s15
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s45
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s19
+; GCN-HSA-NEXT: s_ashr_i32 s30, s49, 31
+; GCN-HSA-NEXT: s_ashr_i32 s31, s49, 24
+; GCN-HSA-NEXT: s_ashr_i32 s35, s47, 31
+; GCN-HSA-NEXT: s_ashr_i32 s36, s47, 24
+; GCN-HSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[8:9], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[14:15], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[2:3], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[54:55], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000
-; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
+; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
; GCN-HSA-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000
; GCN-HSA-NEXT: s_add_u32 s54, s0, 0x50
; GCN-HSA-NEXT: s_addc_u32 s55, s1, 0
@@ -8447,19 +8461,20 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s55
; GCN-HSA-NEXT: s_add_u32 s54, s0, 16
; GCN-HSA-NEXT: s_addc_u32 s55, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v20, s40
+; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[12:15]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
+; GCN-HSA-NEXT: v_mov_b32_e32 v12, s40
; GCN-HSA-NEXT: s_add_u32 s40, s0, 0xd0
-; GCN-HSA-NEXT: v_mov_b32_e32 v21, s41
+; GCN-HSA-NEXT: v_mov_b32_e32 v13, s41
; GCN-HSA-NEXT: s_addc_u32 s41, s1, 0
-; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
-; GCN-HSA-NEXT: v_mov_b32_e32 v28, s54
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s40
+; GCN-HSA-NEXT: v_mov_b32_e32 v28, s54
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s41
; GCN-HSA-NEXT: s_add_u32 s40, s0, 0xc0
; GCN-HSA-NEXT: v_mov_b32_e32 v29, s55
; GCN-HSA-NEXT: s_addc_u32 s41, s1, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[0:3]
-; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[12:15]
+; GCN-HSA-NEXT: v_mov_b32_e32 v6, s50
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26
; GCN-HSA-NEXT: s_add_u32 s26, s0, 0x90
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27
@@ -8468,8 +8483,7 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: v_mov_b32_e32 v25, s27
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
; GCN-HSA-NEXT: s_add_u32 s26, s0, 0x80
-; GCN-HSA-NEXT: v_mov_b32_e32 v6, s46
-; GCN-HSA-NEXT: v_mov_b32_e32 v7, s47
+; GCN-HSA-NEXT: v_mov_b32_e32 v7, s51
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
; GCN-HSA-NEXT: s_addc_u32 s27, s1, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
@@ -8477,37 +8491,37 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s24
; GCN-HSA-NEXT: s_add_u32 s24, s0, 0x70
; GCN-HSA-NEXT: v_mov_b32_e32 v17, s45
-; GCN-HSA-NEXT: v_mov_b32_e32 v18, s50
-; GCN-HSA-NEXT: v_mov_b32_e32 v19, s51
+; GCN-HSA-NEXT: v_mov_b32_e32 v18, s48
+; GCN-HSA-NEXT: v_mov_b32_e32 v19, s49
; GCN-HSA-NEXT: v_mov_b32_e32 v10, s40
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s25
; GCN-HSA-NEXT: s_addc_u32 s25, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v20, s46
+; GCN-HSA-NEXT: v_mov_b32_e32 v21, s47
; GCN-HSA-NEXT: v_mov_b32_e32 v22, s52
; GCN-HSA-NEXT: v_mov_b32_e32 v23, s53
-; GCN-HSA-NEXT: v_mov_b32_e32 v12, s42
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, s43
; GCN-HSA-NEXT: v_mov_b32_e32 v11, s41
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s48
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s49
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s42
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s43
; GCN-HSA-NEXT: v_mov_b32_e32 v26, s26
; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[16:19]
; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[20:23]
-; GCN-HSA-NEXT: v_mov_b32_e32 v10, s18
-; GCN-HSA-NEXT: s_add_u32 s18, s0, 0x60
+; GCN-HSA-NEXT: v_mov_b32_e32 v10, s20
+; GCN-HSA-NEXT: s_add_u32 s20, s0, 0x60
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s28
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s29
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s27
; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[12:15]
-; GCN-HSA-NEXT: v_mov_b32_e32 v11, s19
+; GCN-HSA-NEXT: v_mov_b32_e32 v11, s21
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s24
-; GCN-HSA-NEXT: s_addc_u32 s19, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s18
+; GCN-HSA-NEXT: s_addc_u32 s21, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v14, s20
; GCN-HSA-NEXT: v_mov_b32_e32 v8, s22
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s38
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s37
; GCN-HSA-NEXT: v_mov_b32_e32 v9, s23
; GCN-HSA-NEXT: v_mov_b32_e32 v13, s25
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s19
+; GCN-HSA-NEXT: v_mov_b32_e32 v15, s21
; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[0:3]
; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
@@ -8521,50 +8535,50 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: s_nop 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8
-; GCN-HSA-NEXT: s_add_u32 s8, s0, 32
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9
-; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9
-; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xf0
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10
+; GCN-HSA-NEXT: s_add_u32 s10, s0, 32
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11
+; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11
+; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xf0
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21
+; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9
-; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xe0
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8
+; GCN-HSA-NEXT: s_nop 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
+; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
+; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
+; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9
+; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GCN-HSA-NEXT: s_nop 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
-; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xb0
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7
-; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
; GCN-HSA-NEXT: s_add_u32 s0, s0, 0xa0
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s31
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7
+; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
-; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
-; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
-; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
-; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7
+; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4
+; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-HSA-NEXT: s_endpgm
@@ -8581,151 +8595,155 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr26_sgpr27
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr10_sgpr11
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
+; GCN-NOHSA-VI-NEXT: ; implicit-def: $sgpr6_sgpr7
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s4, v2
-; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s9, v7
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s11, v5
; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s5, v3
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s6, v0
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s10, v4
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s9, v1
+; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s13, v7
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s15, v5
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s8, v0
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s12, v6
+; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s14, v4
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s4, 16
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s4, 24
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s4, 8
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s9, 8
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s39, s11, 24
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s7, v1
-; GCN-NOHSA-VI-NEXT: v_readfirstlane_b32 s8, v6
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s6, 8
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s10, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s10, 24
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s10, 8
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s11, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s11, 8
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s66, s11
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[10:11], 0x80000
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s4, 8
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s5, 8
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s26, s5
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s9, 8
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, s9
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s13, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s13, 8
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s13
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s15, 16
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s39, s15, 24
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s41, s13, 31
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s45, s13, 24
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s8, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s8, 24
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s8, 8
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s12, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s12, 24
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s12, 8
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s14, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s14, 24
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s14, 8
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s9, 16
+; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s15, 8
+; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s15
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[14:15], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[12:13], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x80000
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[68:69], s[4:5], 0x80000
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s11, 31
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[60:61], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[38:39], 0x80000
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s8, 16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s8, 24
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s15, 31
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s67, s9, 31
+; GCN-NOHSA-VI-NEXT: s_ashr_i32 s70, s9, 24
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[64:65], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[10:11], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[62:63], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[20:21], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[18:19], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[28:29], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[26:27], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[60:61], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[44:45], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[40:41], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[38:39], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s68
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s69
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s60
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s61
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s40
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s41
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s42
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s8, 8
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s64
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s65
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s62
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s63
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s60
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[8:9], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s48
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x80000
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s51
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s52
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x80000
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s56
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s5, 16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s54
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s5, 8
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s56
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s57
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s58
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s26, s5
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s71, s5, 31
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s42
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43
; GCN-NOHSA-VI-NEXT: s_ashr_i32 s72, s5, 24
; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s7, 16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s36
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s37
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s7, 8
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x80000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s72
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s71
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, s7
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s67, s7, 31
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s70, s7, 24
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s9, 16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[66:67], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
-; GCN-NOHSA-VI-NEXT: s_mov_b32 s12, s9
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x80000
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s70
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s67
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s63, s9, 31
-; GCN-NOHSA-VI-NEXT: s_ashr_i32 s65, s9, 24
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17
+; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s18
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s6, 16
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s46
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s65
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63
-; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s6, 24
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[62:63], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s45
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[66:67], 0x80000
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s47
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[64:65], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
-; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, s44
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, s50
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s39
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, s45
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, s46
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, s47
+; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, s51
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144
; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index ddd1ce66c013a..76cd984eecd92 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -5736,6 +5736,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; SI-NEXT: v_mov_b32_e32 v0, s1
; SI-NEXT: s_mov_b32 m0, -1
; SI-NEXT: ds_read_b64 v[0:1], v0
+; SI-NEXT: ; implicit-def: $vgpr2_vgpr3
; SI-NEXT: v_mov_b32_e32 v9, s0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v3, v1
@@ -5756,6 +5757,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; VI-NO-DS128: ; %bb.0:
; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-NO-DS128-NEXT: s_mov_b32 m0, -1
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr2_vgpr3
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1
; VI-NO-DS128-NEXT: ds_read_b64 v[0:1], v0
@@ -5778,6 +5780,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; GFX9-NO-DS128-LABEL: local_sextload_v4i16_to_v4i64:
; GFX9-NO-DS128: ; %bb.0:
; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NO-DS128-NEXT: ds_read_b64 v[0:1], v0
@@ -5846,6 +5849,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; VI-DS128: ; %bb.0:
; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-DS128-NEXT: s_mov_b32 m0, -1
+; VI-DS128-NEXT: ; implicit-def: $vgpr2_vgpr3
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-DS128-NEXT: v_mov_b32_e32 v0, s1
; VI-DS128-NEXT: ds_read_b64 v[0:1], v0
@@ -5869,6 +5873,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; GFX9-DS128-LABEL: local_sextload_v4i16_to_v4i64:
; GFX9-DS128: ; %bb.0:
; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1
; GFX9-DS128-NEXT: ds_read_b64 v[0:1], v0
@@ -6140,6 +6145,7 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; SI-NEXT: v_mov_b32_e32 v0, s1
; SI-NEXT: s_mov_b32 m0, -1
; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
+; SI-NEXT: ; implicit-def: $vgpr4_vgpr5
; SI-NEXT: v_mov_b32_e32 v16, s0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v9, v3
@@ -6171,6 +6177,7 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; VI-NO-DS128: ; %bb.0:
; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-NO-DS128-NEXT: s_mov_b32 m0, -1
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr16_vgpr17
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1
; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
@@ -6206,6 +6213,7 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; GFX9-NO-DS128-LABEL: local_sextload_v8i16_to_v8i64:
; GFX9-NO-DS128: ; %bb.0:
; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr16_vgpr17
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
@@ -6335,17 +6343,18 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; VI-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; VI-DS128-NEXT: v_bfe_i32 v6, v0, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2
-; VI-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16
+; VI-DS128-NEXT: ; implicit-def: $vgpr0_vgpr1
+; VI-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
; VI-DS128-NEXT: v_mov_b32_e32 v0, v3
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
-; VI-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; VI-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -6370,17 +6379,18 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-DS128-NEXT: v_bfe_i32 v6, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2
-; GFX9-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX9-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v3
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
-; GFX9-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX9-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -6816,6 +6826,8 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; SI-NEXT: s_mov_b32 m0, -1
; SI-NEXT: ds_read2_b64 v[0:3], v4 offset0:2 offset1:3
; SI-NEXT: ds_read2_b64 v[4:7], v4 offset1:1
+; SI-NEXT: ; implicit-def: $vgpr8_vgpr9
+; SI-NEXT: ; implicit-def: $vgpr8_vgpr9
; SI-NEXT: v_mov_b32_e32 v18, s0
; SI-NEXT: s_waitcnt lgkmcnt(1)
; SI-NEXT: v_mov_b32_e32 v12, v3
@@ -6874,60 +6886,62 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1
; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1
-; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3
+; VI-NO-DS128-NEXT: ds_read2_b64 v[8:11], v4 offset0:2 offset1:3
; VI-NO-DS128-NEXT: v_mov_b32_e32 v19, s0
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1)
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v3
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v4
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v8
; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v16, v8, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v5
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v9
; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:8 offset1:9
-; VI-NO-DS128-NEXT: v_bfe_i32 v14, v4, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v14, v8, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v8, v9, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[4:5], v[14:15] offset0:10 offset1:11
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v6
-; VI-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[8:9], v[14:15] offset0:10 offset1:11
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v10
+; VI-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v14, v10, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:12 offset1:13
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v7
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, v7
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[8:9] offset0:12 offset1:13
+; VI-NO-DS128-NEXT: v_bfe_i32 v8, v18, 0, 16
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v11
+; VI-NO-DS128-NEXT: v_mov_b32_e32 v18, v11
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr16_vgpr17
; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v18, v3
+; VI-NO-DS128-NEXT: v_bfe_i32 v16, v18, 0, 16
+; VI-NO-DS128-NEXT: v_mov_b32_e32 v20, v3
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v0
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:14 offset1:15
-; VI-NO-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v12, v11, 0, 16
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; VI-NO-DS128-NEXT: v_bfe_i32 v14, v20, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v6, v5, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v12, v7, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:6 offset1:7
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr16_vgpr17
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[8:9] offset0:6 offset1:7
; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[12:13] offset0:4 offset1:5
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[10:11] offset0:2 offset1:3
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[8:9] offset1:1
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[10:11], v[6:7] offset0:2 offset1:3
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[4:5] offset1:1
; VI-NO-DS128-NEXT: s_endpgm
;
; GFX9-NO-DS128-LABEL: local_sextload_v16i16_to_v16i64:
@@ -6936,60 +6950,62 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1
; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1
-; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3
+; GFX9-NO-DS128-NEXT: ds_read2_b64 v[8:11], v4 offset0:2 offset1:3
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v19, s0
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v3
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v4
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v8
; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v8, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v5
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v9
; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:8 offset1:9
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v4, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v8, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v9, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[4:5], v[14:15] offset0:10 offset1:11
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v6
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[8:9], v[14:15] offset0:10 offset1:11
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v10
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v10, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:12 offset1:13
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v7
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v16, v7
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[8:9] offset0:12 offset1:13
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v18, 0, 16
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v11
+; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v18, v11
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr16_vgpr17
; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v18, v3
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v18, 0, 16
+; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v20, v3
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v0
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:14 offset1:15
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v11, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v20, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v5, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v7, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:6 offset1:7
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[8:9] offset0:6 offset1:7
; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[12:13] offset0:4 offset1:5
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[10:11] offset0:2 offset1:3
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[8:9] offset1:1
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[10:11], v[6:7] offset0:2 offset1:3
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[4:5] offset1:1
; GFX9-NO-DS128-NEXT: s_endpgm
;
; EG-LABEL: local_sextload_v16i16_to_v16i64:
@@ -7171,16 +7187,16 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_mov_b32_e32 v0, s1
; VI-DS128-NEXT: ds_read_b128 v[3:6], v0
; VI-DS128-NEXT: ds_read_b128 v[7:10], v0 offset:16
+; VI-DS128-NEXT: v_mov_b32_e32 v18, s0
; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
-; VI-DS128-NEXT: v_mov_b32_e32 v18, v6
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-DS128-NEXT: v_bfe_i32 v11, v8, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v8
; VI-DS128-NEXT: v_bfe_i32 v13, v8, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; VI-DS128-NEXT: v_mov_b32_e32 v8, s0
-; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:80
+; VI-DS128-NEXT: ds_write_b128 v18, v[11:14] offset:80
; VI-DS128-NEXT: v_bfe_i32 v11, v7, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
@@ -7188,24 +7204,25 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v10
-; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:64
+; VI-DS128-NEXT: ds_write_b128 v18, v[11:14] offset:64
; VI-DS128-NEXT: v_bfe_i32 v11, v15, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; VI-DS128-NEXT: ds_write_b128 v18, v[11:14] offset:112
+; VI-DS128-NEXT: ; implicit-def: $vgpr14_vgpr15
; VI-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v9
-; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:112
; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6
+; VI-DS128-NEXT: v_mov_b32_e32 v8, v6
; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; VI-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5
-; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:96
-; VI-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
+; VI-DS128-NEXT: ds_write_b128 v18, v[14:17] offset:96
+; VI-DS128-NEXT: v_bfe_i32 v14, v8, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v16, v19, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
@@ -7220,10 +7237,11 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:48
-; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32
-; VI-DS128-NEXT: ds_write_b128 v8, v[10:13] offset:16
-; VI-DS128-NEXT: ds_write_b128 v8, v[0:3]
+; VI-DS128-NEXT: ; implicit-def: $vgpr8_vgpr9
+; VI-DS128-NEXT: ds_write_b128 v18, v[14:17] offset:48
+; VI-DS128-NEXT: ds_write_b128 v18, v[4:7] offset:32
+; VI-DS128-NEXT: ds_write_b128 v18, v[10:13] offset:16
+; VI-DS128-NEXT: ds_write_b128 v18, v[0:3]
; VI-DS128-NEXT: s_endpgm
;
; GFX9-DS128-LABEL: local_sextload_v16i16_to_v16i64:
@@ -7233,6 +7251,7 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1
; GFX9-DS128-NEXT: ds_read_b128 v[3:6], v0
; GFX9-DS128-NEXT: ds_read_b128 v[7:10], v0 offset:16
+; GFX9-DS128-NEXT: v_mov_b32_e32 v18, s0
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
@@ -7242,8 +7261,7 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_bfe_i32 v13, v3, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GFX9-DS128-NEXT: v_mov_b32_e32 v8, s0
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:80
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[11:14] offset:80
; GFX9-DS128-NEXT: v_bfe_i32 v11, v7, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
@@ -7251,24 +7269,25 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v10
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:64
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[11:14] offset:64
; GFX9-DS128-NEXT: v_bfe_i32 v11, v15, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[11:14] offset:112
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr14_vgpr15
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v9
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:112
; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v18, v6
+; GFX9-DS128-NEXT: v_mov_b32_e32 v8, v6
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GFX9-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:96
-; GFX9-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[14:17] offset:96
+; GFX9-DS128-NEXT: v_bfe_i32 v14, v8, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v16, v19, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16
@@ -7282,10 +7301,11 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:48
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[10:13] offset:16
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3]
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[14:17] offset:48
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[4:7] offset:32
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[10:13] offset:16
+; GFX9-DS128-NEXT: ds_write_b128 v18, v[0:3]
; GFX9-DS128-NEXT: s_endpgm
%load = load <16 x i16>, ptr addrspace(3) %in
%ext = sext <16 x i16> %load to <16 x i64>
@@ -8092,7 +8112,7 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; SI-NEXT: ds_write2_b64 v7, v[5:6], v[15:16] offset0:12 offset1:13
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v2
+; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2
; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v10
; SI-NEXT: v_bfe_i32 v3, v4, 0, 16
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v8
@@ -8109,7 +8129,11 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v0
; SI-NEXT: v_bfe_i32 v9, v0, 0, 16
; SI-NEXT: v_bfe_i32 v10, v2, 0, 16
-; SI-NEXT: v_bfe_i32 v12, v11, 0, 16
+; SI-NEXT: ; implicit-def: $vgpr11_vgpr12
+; SI-NEXT: ; implicit-def: $vgpr11_vgpr12
+; SI-NEXT: ; implicit-def: $vgpr11_vgpr12
+; SI-NEXT: ; implicit-def: $vgpr11_vgpr12
+; SI-NEXT: v_bfe_i32 v12, v13, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; SI-NEXT: ds_write2_b64 v7, v[10:11], v[12:13] offset0:4 offset1:5
@@ -8139,6 +8163,7 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-NO-DS128: ; %bb.0:
; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-NO-DS128-NEXT: s_mov_b32 m0, -1
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr23_vgpr24
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-NO-DS128-NEXT: v_mov_b32_e32 v7, s1
; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v7 offset0:6 offset1:7
@@ -8242,6 +8267,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr21_vgpr22
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr21_vgpr22
+; VI-NO-DS128-NEXT: ; implicit-def: $vgpr21_vgpr22
; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[19:20] offset0:6 offset1:7
; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[8:9], v[15:16] offset0:4 offset1:5
; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[6:7], v[13:14] offset0:2 offset1:3
@@ -8251,6 +8279,7 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-NO-DS128-LABEL: local_sextload_v32i16_to_v32i64:
; GFX9-NO-DS128: ; %bb.0:
; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr22_vgpr23
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, s1
; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v8 offset0:6 offset1:7
@@ -8354,6 +8383,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr20_vgpr21
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr20_vgpr21
+; GFX9-NO-DS128-NEXT: ; implicit-def: $vgpr20_vgpr21
; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[18:19] offset0:6 offset1:7
; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[10:11], v[12:13] offset0:4 offset1:5
; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[8:9], v[1:2] offset0:2 offset1:3
@@ -8788,13 +8820,14 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v18
; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:64
; VI-DS128-NEXT: v_bfe_i32 v9, v4, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; VI-DS128-NEXT: v_bfe_i32 v11, v4, 0, 16
+; VI-DS128-NEXT: ; implicit-def: $vgpr4_vgpr5
; VI-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v18
; VI-DS128-NEXT: v_bfe_i32 v15, v5, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v11, v4, 0, 16
; VI-DS128-NEXT: v_mov_b32_e32 v4, v7
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
@@ -8814,6 +8847,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; VI-DS128-NEXT: ; implicit-def: $vgpr17_vgpr18
+; VI-DS128-NEXT: ; implicit-def: $vgpr17_vgpr18
+; VI-DS128-NEXT: ; implicit-def: $vgpr17_vgpr18
; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32
; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:48
; VI-DS128-NEXT: ds_write_b128 v8, v[9:12]
@@ -8899,17 +8935,18 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v18
; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:112
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v6, v8, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v8, v0, 0, 16
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v19
; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:64
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
; GFX9-DS128-NEXT: v_bfe_i32 v13, v19, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v15, v1, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v6, v8, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v8, v0, 0, 16
; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
@@ -8929,6 +8966,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX9-DS128-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-DS128-NEXT: ds_write_b128 v12, v[17:20] offset:32
; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:48
; GFX9-DS128-NEXT: ds_write_b128 v12, v[6:9]
diff --git a/llvm/test/CodeGen/AMDGPU/lround.ll b/llvm/test/CodeGen/AMDGPU/lround.ll
index d8d8308f6cd8a..6c65fbad7a067 100644
--- a/llvm/test/CodeGen/AMDGPU/lround.ll
+++ b/llvm/test/CodeGen/AMDGPU/lround.ll
@@ -822,14 +822,16 @@ define half @intrinsic_fround_half(half %arg) {
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_trunc_f16_e32 v1.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_sub_f16_e32 v1.l, v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_cmp_ge_f16_e64 s0, |v1.l|, 0.5
-; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0, 0x3c00, s0
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.l, 0, 0x3c00, s0
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v1, v0
+; GFX11-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v2
; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v1.h, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -943,14 +945,16 @@ define i32 @intrinsic_lround_i32_f16(half %arg) {
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_trunc_f16_e32 v1.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_sub_f16_e32 v1.l, v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_cmp_ge_f16_e64 s0, |v1.l|, 0.5
-; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0, 0x3c00, s0
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v0.l, 0, 0x3c00, s0
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v1, v0
+; GFX11-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v2
; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v1.h, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index 1ae3434db6da5..dd31246dd3f1e 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -137,13 +137,23 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %s
}
define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) #0 {
-; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-TRUE16-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
+; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; SDAG-GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX11-TRUE16-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v4
+; SDAG-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-FAKE16-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
; GFX9: ; %bb.0:
@@ -172,6 +182,14 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src
; SDAG-CI-NEXT: v_mov_b32_e32 v0, v3
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index eab92668c536b..6c162a55f59eb 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -411,9 +411,10 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: ; implicit-def: $vgpr3
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.h, v3.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.h, v4.l
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
@@ -534,10 +535,11 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: ; implicit-def: $vgpr6
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v7.l
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v6
@@ -704,11 +706,13 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v8, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v9, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: ; implicit-def: $vgpr6
+; SDAG-GFX1100-TRUE16-NEXT: ; implicit-def: $vgpr7
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.h, v7.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.h, v8.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.h, v9.l
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
@@ -1452,9 +1456,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: ; implicit-def: $vgpr3
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v4.l
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -1617,9 +1624,12 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: ; implicit-def: $vgpr3
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v4.l
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
index e6960a3f710da..47dd7638f2588 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
@@ -623,6 +623,7 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
; GFX9-NEXT: v_mov_b32_e32 v4, v1
; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v0
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, v[2:3]
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: v_mov_b32_e32 v2, v1
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v1, v2
@@ -634,20 +635,24 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
-; GFX1100-NEXT: v_ashrrev_i32_e32 v5, 31, v5
-; GFX1100-NEXT: v_mov_b32_e32 v3, v1
+; GFX1100-NEXT: v_ashrrev_i32_e32 v3, 31, v5
+; GFX1100-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX1100-NEXT: v_mov_b32_e32 v5, v1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4]
+; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v3, v4, v[5:6]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1150-LABEL: mad_i64_i32_extops_i32_i64:
; GFX1150: ; %bb.0:
; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1150-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
-; GFX1150-NEXT: v_ashrrev_i32_e32 v2, 31, v5
-; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v2, v4, v[1:2]
+; GFX1150-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1150-NEXT: v_ashrrev_i32_e32 v5, 31, v5
+; GFX1150-NEXT: v_mov_b32_e32 v2, v1
+; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[2:3]
; GFX1150-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: mad_i64_i32_extops_i32_i64:
@@ -658,10 +663,13 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v4, v[2:3]
-; GFX12-NEXT: v_ashrrev_i32_e32 v2, 31, v5
-; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v2, v4, v[1:2]
+; GFX12-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX12-NEXT: v_ashrrev_i32_e32 v5, 31, v5
+; GFX12-NEXT: v_mov_b32_e32 v2, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v5, v4, v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: mad_i64_i32_extops_i32_i64:
@@ -767,6 +775,7 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v3, 1, v1
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5]
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: v_mov_b32_e32 v4, v1
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v2, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v1, v2
@@ -777,11 +786,13 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0
; GFX1100-NEXT: v_mov_b32_e32 v6, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5]
-; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_and_b32 v5, 1, v6
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5]
+; GFX1100-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1100-NEXT: v_and_b32_e32 v6, 1, v6
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mov_b32_e32 v4, v1
+; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v6, v3, v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1150-LABEL: mad_u64_u32_bitops_lhs_mask_small:
@@ -789,9 +800,12 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1150-NEXT: v_mov_b32_e32 v3, v1
; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5]
-; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1150-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v3, v2, v[1:2]
+; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1150-NEXT: v_and_b32_e32 v5, 1, v3
+; GFX1150-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX1150-NEXT: v_mov_b32_e32 v3, v1
+; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v5, v2, v[3:4]
; GFX1150-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: mad_u64_u32_bitops_lhs_mask_small:
@@ -803,9 +817,12 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v3, v1
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v2, v[4:5]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v3, v2, v[1:2]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_and_b32_e32 v5, 1, v3
+; GFX12-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX12-NEXT: v_mov_b32_e32 v3, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v5, v2, v[3:4]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: mad_u64_u32_bitops_lhs_mask_small:
@@ -852,10 +869,11 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v6, v0
+; GFX9-NEXT: v_and_b32_e32 v7, 1, v3
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, v[4:5]
-; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v3, v[2:3]
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v7, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -863,21 +881,26 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_mov_b32_e32 v6, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5]
-; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v4, 1, v3
+; GFX1100-NEXT: v_and_b32_e32 v5, 1, v3
+; GFX1100-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX1100-NEXT: v_mov_b32_e32 v3, v1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4]
+; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v6, v5, v[3:4]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1150-LABEL: mad_u64_u32_bitops_rhs_mask_small:
; GFX1150: ; %bb.0:
; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1150-NEXT: v_mov_b32_e32 v6, v0
-; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5]
-; GFX1150-NEXT: v_and_b32_e32 v2, 1, v3
-; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v6, v2, v[1:2]
+; GFX1150-NEXT: v_and_b32_e32 v4, 1, v3
+; GFX1150-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1150-NEXT: v_mov_b32_e32 v2, v1
+; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[2:3]
; GFX1150-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: mad_u64_u32_bitops_rhs_mask_small:
@@ -888,10 +911,13 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v6, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v6, v2, v[4:5]
-; GFX12-NEXT: v_and_b32_e32 v2, 1, v3
-; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v2, v[1:2]
+; GFX12-NEXT: v_and_b32_e32 v4, 1, v3
+; GFX12-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX12-NEXT: v_mov_b32_e32 v2, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v4, v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: mad_u64_u32_bitops_rhs_mask_small:
@@ -1795,6 +1821,7 @@ define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v0, 0
+; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[6:7]
; GFX9-NEXT: v_mov_b32_e32 v5, 0
@@ -1806,20 +1833,22 @@ define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 {
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0
+; GFX1100-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0
-; GFX1100-NEXT: v_mad_u64_u32 v[5:6], null, v2, v0, v[1:2]
+; GFX1100-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_mad_u64_u32 v[7:8], null, v2, v0, v[5:6]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v5, v[3:4]
+; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v7, v[3:4]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1150-LABEL: lshr_mad_i64_4:
; GFX1150: ; %bb.0:
; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1150-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0
+; GFX1150-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1150-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0
-; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, v[1:2]
+; GFX1150-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, 0
+; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, v[5:6]
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4]
; GFX1150-NEXT: s_setpc_b64 s[30:31]
@@ -1832,9 +1861,10 @@ define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v0, 0
+; GFX12-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v4, 0
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, v0, v[1:2]
+; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, 0
+; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, v0, v[5:6]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4]
; GFX12-NEXT: s_setpc_b64 s[30:31]
@@ -2116,9 +2146,10 @@ define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v0, v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v0, v3
-; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v1, v[0:1]
+; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v1, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v1, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -2126,22 +2157,22 @@ define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 {
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1]
+; GFX1100-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mov_b32_e32 v0, v3
-; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v1, v[0:1]
+; GFX1100-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v0, v2
+; GFX1100-NEXT: v_mad_u64_u32 v[3:4], null, v1, v1, v[5:6]
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
+; GFX1100-NEXT: v_mov_b32_e32 v1, v3
; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; GFX1150-LABEL: lshr_mad_i64_negative_4:
; GFX1150: ; %bb.0:
; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1150-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, v[0:1]
+; GFX1150-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1150-NEXT: v_mov_b32_e32 v0, v4
-; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v1, v1, v[0:1]
-; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1150-NEXT: v_mov_b32_e32 v0, v3
+; GFX1150-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v0, v3
+; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v1, v1, v[5:6]
; GFX1150-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: lshr_mad_i64_negative_4:
@@ -2152,11 +2183,10 @@ define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v1, v0, v[0:1]
+; GFX12-NEXT: ; implicit-def: $vgpr5_vgpr6
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_mov_b32_e32 v0, v4
-; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v1, v1, v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX12-NEXT: v_mov_b32_e32 v0, v3
+; GFX12-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v0, v3
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v1, v1, v[5:6]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: lshr_mad_i64_negative_4:
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index 9f27e1ffd9130..a086503dd7664 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -8,6 +8,7 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-GISEL %s
define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_vv:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -51,6 +52,7 @@ define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
+;
; GFX900-LABEL: fadd_v2_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -96,6 +98,7 @@ define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
}
define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
+;
; GFX900-LABEL: fadd_v4_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
@@ -181,6 +184,7 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
}
define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
+;
; GFX900-LABEL: fadd_v32_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -485,6 +489,7 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; FIXME: GISel does not use op_sel for splat constants.
define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_v_imm:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -559,6 +564,7 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_v_v_splat:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -631,6 +637,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_v_lit_splat:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -703,6 +710,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_v_lit_hi0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -761,6 +769,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_v_lit_lo0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -821,6 +830,7 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fadd_v2_v_unfoldable_lit:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -882,6 +892,7 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
; FIXME: Fold fneg into v_pk_add_f32 with Global ISel.
define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
+;
; GFX900-LABEL: fadd_v2_v_fneg:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -927,10 +938,12 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX1250-SDAG-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
+; GFX1250-SDAG-NEXT: s_mov_b32 s4, s2
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]
+; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[4:5] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] scale_offset
; GFX1250-SDAG-NEXT: s_endpgm
;
@@ -959,6 +972,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
}
define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
+;
; GFX900-LABEL: fadd_v2_v_fneg_lo:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1004,10 +1018,12 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX1250-SDAG-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
+; GFX1250-SDAG-NEXT: s_mov_b32 s4, s2
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] op_sel_hi:[1,0] neg_lo:[0,1]
+; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[4:5] op_sel_hi:[1,0] neg_lo:[0,1]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] scale_offset
; GFX1250-SDAG-NEXT: s_endpgm
;
@@ -1036,6 +1052,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
}
define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
+;
; GFX900-LABEL: fadd_v2_v_fneg_hi:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1081,10 +1098,12 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX1250-SDAG-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
+; GFX1250-SDAG-NEXT: s_mov_b32 s4, s2
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] op_sel_hi:[1,0] neg_hi:[0,1]
+; GFX1250-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[4:5] op_sel_hi:[1,0] neg_hi:[0,1]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] scale_offset
; GFX1250-SDAG-NEXT: s_endpgm
;
@@ -1113,6 +1132,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
}
define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, float %y) {
+;
; GFX900-LABEL: fadd_v2_v_fneg_lo2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -1189,6 +1209,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, flo
}
define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, float %y) {
+;
; GFX900-LABEL: fadd_v2_v_fneg_hi2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -1265,6 +1286,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, flo
}
define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fmul_v2_vv:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1308,6 +1330,7 @@ define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
+;
; GFX900-LABEL: fmul_v2_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -1353,6 +1376,7 @@ define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
}
define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
+;
; GFX900-LABEL: fmul_v4_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
@@ -1438,6 +1462,7 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
}
define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
+;
; GFX900-LABEL: fmul_v32_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1741,6 +1766,7 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
}
define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fmul_v2_v_imm:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1815,6 +1841,7 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fmul_v2_v_v_splat:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1887,6 +1914,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fmul_v2_v_lit_splat:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -1959,6 +1987,7 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fmul_v2_v_unfoldable_lit:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2019,6 +2048,7 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
+;
; GFX900-LABEL: fmul_v2_v_fneg:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2064,10 +2094,12 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX1250-SDAG-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
+; GFX1250-SDAG-NEXT: s_mov_b32 s4, s2
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[2:3] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]
+; GFX1250-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[4:5] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] scale_offset
; GFX1250-SDAG-NEXT: s_endpgm
;
@@ -2096,6 +2128,7 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
}
define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fma_v2_vv:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2139,6 +2172,7 @@ define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
+;
; GFX900-LABEL: fma_v2_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -2184,6 +2218,7 @@ define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
}
define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
+;
; GFX900-LABEL: fma_v4_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
@@ -2269,6 +2304,7 @@ define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
}
define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
+;
; GFX900-LABEL: fma_v32_vs:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2571,6 +2607,7 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
}
define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fma_v2_v_imm:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2673,6 +2710,7 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fma_v2_v_v_splat:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2745,6 +2783,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fma_v2_v_lit_splat:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2840,6 +2879,7 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fma_v2_v_unfoldable_lit:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2940,6 +2980,7 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
+;
; GFX900-LABEL: fma_v2_v_fneg:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -2985,10 +3026,12 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v0
+; GFX1250-SDAG-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v2, s[0:1] scale_offset
+; GFX1250-SDAG-NEXT: s_mov_b32 s4, s2
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], s[2:3] op_sel_hi:[1,0,0] neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX1250-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[4:5], s[4:5] op_sel_hi:[1,0,0] neg_lo:[0,1,1] neg_hi:[0,1,1]
; GFX1250-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] scale_offset
; GFX1250-SDAG-NEXT: s_endpgm
;
@@ -3017,6 +3060,7 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
}
define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) {
+;
; GFX900-LABEL: add_vector_neg_bitcast_scalar_lo:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -3104,6 +3148,7 @@ bb:
}
define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) {
+;
; GFX900-LABEL: fma_vector_vector_neg_scalar_lo_scalar_hi:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -3206,6 +3251,7 @@ bb:
}
define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+;
; GFX900-LABEL: shuffle_add_f32:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dword s0, s[4:5], 0x2c
@@ -3284,6 +3330,7 @@ bb:
}
define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+;
; GFX900-LABEL: shuffle_neg_add_f32:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dword s0, s[4:5], 0x2c
@@ -3382,6 +3429,7 @@ bb:
}
define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
+;
; GFX900-LABEL: fadd_fadd_fsub_0:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -3405,9 +3453,10 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX90A-GISEL-LABEL: fadd_fadd_fsub_0:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX90A-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
-; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v1
+; GFX90A-GISEL-NEXT: v_pk_add_f32 v[2:3], s[0:1], 0
+; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v3, v0
@@ -3417,10 +3466,11 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX942-GISEL-LABEL: fadd_fadd_fsub_0:
; GFX942-GISEL: ; %bb.0: ; %bb
; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
+; GFX942-GISEL-NEXT: v_pk_add_f32 v[2:3], s[0:1], 0
; GFX942-GISEL-NEXT: s_nop 0
-; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v1
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v0
@@ -3441,15 +3491,15 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX1250-GISEL-LABEL: fadd_fadd_fsub_0:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
-; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
-; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, v1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
-; GFX1250-GISEL-NEXT: v_mov_b32_e32 v3, v0
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[2:3], 0
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, v0
; GFX1250-GISEL-NEXT: flat_store_b64 v[0:1], v[2:3]
; GFX1250-GISEL-NEXT: s_endpgm
bb:
@@ -3463,6 +3513,7 @@ bb:
}
define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, ptr addrspace(1) %ptr) {
+;
; GFX900-LABEL: fadd_fadd_fsub:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -3496,35 +3547,37 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
; GFX90A-GISEL-LABEL: fadd_fadd_fsub:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX90A-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
-; GFX90A-GISEL-NEXT: v_sub_f32_e32 v0, s0, v2
-; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-GISEL-NEXT: v_pk_add_f32 v[2:3], s[2:3], v[2:3]
-; GFX90A-GISEL-NEXT: v_subrev_f32_e32 v1, s3, v2
-; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX90A-GISEL-NEXT: v_pk_add_f32 v[2:3], s[0:1], v[2:3]
+; GFX90A-GISEL-NEXT: v_sub_f32_e32 v2, s0, v0
+; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[2:3], v[0:1]
+; GFX90A-GISEL-NEXT: v_subrev_f32_e32 v3, s3, v0
+; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-GISEL-NEXT: global_store_dwordx2 v0, v[2:3], s[6:7]
; GFX90A-GISEL-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: fadd_fadd_fsub:
; GFX942-GISEL: ; %bb.0: ; %bb
; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX942-GISEL-NEXT: v_pk_add_f32 v[2:3], s[0:1], v[2:3]
; GFX942-GISEL-NEXT: s_nop 0
-; GFX942-GISEL-NEXT: v_sub_f32_e32 v0, s0, v2
-; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-GISEL-NEXT: v_pk_add_f32 v[2:3], s[2:3], v[2:3]
+; GFX942-GISEL-NEXT: v_sub_f32_e32 v2, s0, v0
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[2:3], v[0:1]
; GFX942-GISEL-NEXT: s_nop 0
-; GFX942-GISEL-NEXT: v_subrev_f32_e32 v1, s3, v2
-; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX942-GISEL-NEXT: v_subrev_f32_e32 v3, s3, v0
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-NEXT: global_store_dwordx2 v0, v[2:3], s[6:7]
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX1250-SDAG-LABEL: fadd_fadd_fsub:
@@ -3549,15 +3602,16 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX1250-GISEL-NEXT: s_sub_f32 s0, s0, s2
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[2:3]
-; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, v1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[2:3], v[0:1]
+; GFX1250-GISEL-NEXT: v_pk_add_f32 v[0:1], v[2:3], v[4:5]
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_subrev_f32 v3, s3, v0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-GISEL-NEXT: global_store_b64 v0, v[2:3], s[4:5]
@@ -3573,6 +3627,7 @@ bb:
}
define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
+;
; GFX900-LABEL: fadd_shuffle_v4:
; GFX900: ; %bb.0: ; %bb
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -3651,6 +3706,7 @@ bb:
}
define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
+;
; GFX900-LABEL: fneg_v2f32_vec:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -3717,6 +3773,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
}
define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x) {
+;
; GFX900-LABEL: fneg_v2f32_scalar:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 25609e881254e..0a8aa31dd7548 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -3357,7 +3357,9 @@ define i64 @v_mul_9_add_52_i64(i64 %arg) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
-; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 9, v[1:2]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 9, v[3:4]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_mul_9_add_52_i64:
@@ -3365,6 +3367,7 @@ define i64 @v_mul_9_add_52_i64(i64 %arg) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 9, 52
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 9, v[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
@@ -3375,7 +3378,9 @@ define i64 @v_mul_9_add_52_i64(i64 %arg) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[1:2]
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: v_mov_b32_e32 v3, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[3:4]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: v_mul_9_add_52_i64:
@@ -3694,7 +3699,9 @@ define i64 @v_mul_5_add_1_i64(i64 %arg) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
-; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 5, v[1:2]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 5, v[3:4]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_mul_5_add_1_i64:
@@ -3702,6 +3709,7 @@ define i64 @v_mul_5_add_1_i64(i64 %arg) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, 5, 1
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 5, v[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
@@ -3712,7 +3720,9 @@ define i64 @v_mul_5_add_1_i64(i64 %arg) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[1:2]
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: v_mov_b32_e32 v3, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[3:4]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: v_mul_5_add_1_i64:
@@ -3772,7 +3782,9 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) {
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4]
-; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[3:4]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_mul_284_add_82_i64:
@@ -3783,6 +3795,7 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) {
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5]
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, v[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
@@ -3794,7 +3807,9 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) {
; GFX10-NEXT: s_movk_i32 s4, 0x11c
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x11c, v2, v[1:2]
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: v_mov_b32_e32 v3, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x11c, v2, v[3:4]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: v_mul_284_add_82_i64:
@@ -3855,7 +3870,9 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) {
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4]
-; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[3:4]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_mul_934584645_add_8234599_i64:
@@ -3866,6 +3883,7 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) {
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5]
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v4, v1
; GFX90A-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, v[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
@@ -3877,7 +3895,9 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) {
; GFX10-NEXT: s_mov_b32 s4, 0x37b4a145
; GFX10-NEXT: v_mov_b32_e32 v2, v1
; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x7da667
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x37b4a145, v2, v[1:2]
+; GFX10-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-NEXT: v_mov_b32_e32 v3, v1
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x37b4a145, v2, v[3:4]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: v_mul_934584645_add_8234599_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 5f42abbeae253..9f0c130688b3c 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -499,9 +499,10 @@ define void @test_rewrite_mfma_subreg_insert0(float %arg0, float %arg1, ptr addr
; CHECK-LABEL: test_rewrite_mfma_subreg_insert0:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT: global_load_dwordx4 a[8:11], v[2:3], off
+; CHECK-NEXT: ; implicit-def: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[0:3]
+; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[8:11]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use a[0:7]
; CHECK-NEXT: ;;#ASMEND
@@ -519,6 +520,7 @@ define void @test_rewrite_mfma_subreg_insert1(float %arg0, float %arg1, ptr addr
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
+; CHECK-NEXT: ; implicit-def: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
; CHECK-NEXT: s_nop 3
@@ -544,7 +546,11 @@ define void @test_rewrite_mfma_subreg_insert2(double %arg0, double %arg1, ptr ad
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 a[0:1], v[4:5], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], v[2:3], a[0:1]
+; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 a[4:5], v[0:1], v[2:3], a[0:1]
+; CHECK-NEXT: ; implicit-def: $agpr0_agpr1_agpr2_agpr3
+; CHECK-NEXT: s_nop 5
+; CHECK-NEXT: v_accvgpr_mov_b32 a0, a4
+; CHECK-NEXT: v_accvgpr_mov_b32 a1, a5
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use a[0:3]
; CHECK-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index 7a3bff8aed56e..256e4bf5f65f0 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -2640,90 +2640,90 @@ define amdgpu_cs float @cs_main(i32 %idx) {
; GFX9-FLATSCR-LABEL: cs_main:
; GFX9-FLATSCR: ; %bb.0:
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, v22
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v19
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, v6
+; GFX9-FLATSCR-NEXT: scratch_load_dword v14, v2, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v16
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[2:5], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v18
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, v1
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v1
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: cs_main:
@@ -2822,93 +2822,93 @@ define amdgpu_cs float @cs_main(i32 %idx) {
; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x10
-; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, v22
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v19
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v6
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v14, v2, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v16
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[2:5], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v18
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, v1
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v1
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v14, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: cs_main:
diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll
index 584d26ed41893..35ca9961c8aad 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll
@@ -8,6 +8,8 @@ define amdgpu_kernel void @v_sext_in_reg_i8_i16_shuffle_vector(ptr addrspace(1)
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b64 v[1:2], v0, s[2:3]
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll
index 9b3dc7f531021..c2bb7806498cf 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll
@@ -57,37 +57,41 @@ define void @v_shuffle_v2f32_v2f32__0_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2f32_v2f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v2f32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2f32_v2f32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -110,37 +114,41 @@ define void @v_shuffle_v2f32_v2f32__2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2f32_v2f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v2f32__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2f32_v2f32__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2f32_v2f32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -350,33 +358,37 @@ define void @v_shuffle_v2f32_v2f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2f32_v2f32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2f32_v2f32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -476,33 +488,37 @@ define void @v_shuffle_v2f32_v2f32__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2f32_v2f32__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2f32_v2f32__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -723,37 +739,41 @@ define void @v_shuffle_v2f32_v2f32__0_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2f32_v2f32__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v2f32__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2f32_v2f32__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -1016,6 +1036,7 @@ define void @s_shuffle_v2f32_v2f32__0_u() {
define void @s_shuffle_v2f32_v2f32__1_u() {
; GFX900-LABEL: s_shuffle_v2f32_v2f32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1028,6 +1049,7 @@ define void @s_shuffle_v2f32_v2f32__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v2f32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1040,6 +1062,7 @@ define void @s_shuffle_v2f32_v2f32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v2f32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1072,6 +1095,7 @@ define void @s_shuffle_v2f32_v2f32__2_u() {
define void @s_shuffle_v2f32_v2f32__3_u() {
; GFX900-LABEL: s_shuffle_v2f32_v2f32__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1084,6 +1108,7 @@ define void @s_shuffle_v2f32_v2f32__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v2f32__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1096,6 +1121,7 @@ define void @s_shuffle_v2f32_v2f32__3_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v2f32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1287,6 +1313,7 @@ define void @s_shuffle_v2f32_v2f32__3_3() {
define void @s_shuffle_v2f32_v2f32__u_0() {
; GFX900-LABEL: s_shuffle_v2f32_v2f32__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1299,6 +1326,7 @@ define void @s_shuffle_v2f32_v2f32__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v2f32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1311,6 +1339,7 @@ define void @s_shuffle_v2f32_v2f32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v2f32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1392,6 +1421,7 @@ define void @s_shuffle_v2f32_v2f32__1_0() {
define void @s_shuffle_v2f32_v2f32__2_0() {
; GFX900-LABEL: s_shuffle_v2f32_v2f32__2_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1404,6 +1434,7 @@ define void @s_shuffle_v2f32_v2f32__2_0() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v2f32__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1416,6 +1447,7 @@ define void @s_shuffle_v2f32_v2f32__2_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v2f32__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1626,6 +1658,7 @@ define void @s_shuffle_v2f32_v2f32__0_2() {
define void @s_shuffle_v2f32_v2f32__1_2() {
; GFX900-LABEL: s_shuffle_v2f32_v2f32__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1638,6 +1671,7 @@ define void @s_shuffle_v2f32_v2f32__1_2() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v2f32__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1650,6 +1684,7 @@ define void @s_shuffle_v2f32_v2f32__1_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v2f32__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll
index 34043cd067b25..0053e66c94b9c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2f32_v3f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2f32_v3f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -102,6 +104,7 @@ define void @v_shuffle_v2f32_v3f32__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -114,6 +117,7 @@ define void @v_shuffle_v2f32_v3f32__2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -126,6 +130,7 @@ define void @v_shuffle_v2f32_v3f32__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -166,9 +171,10 @@ define void @v_shuffle_v2f32_v3f32__4_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -178,9 +184,10 @@ define void @v_shuffle_v2f32_v3f32__4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -197,6 +204,7 @@ define void @v_shuffle_v2f32_v3f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -209,6 +217,7 @@ define void @v_shuffle_v2f32_v3f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -221,6 +230,7 @@ define void @v_shuffle_v2f32_v3f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -536,9 +546,10 @@ define void @v_shuffle_v2f32_v3f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -548,9 +559,10 @@ define void @v_shuffle_v2f32_v3f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -560,9 +572,10 @@ define void @v_shuffle_v2f32_v3f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -706,9 +719,10 @@ define void @v_shuffle_v2f32_v3f32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -718,9 +732,10 @@ define void @v_shuffle_v2f32_v3f32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -730,9 +745,10 @@ define void @v_shuffle_v2f32_v3f32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1066,6 +1082,7 @@ define void @v_shuffle_v2f32_v3f32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1078,6 +1095,7 @@ define void @v_shuffle_v2f32_v3f32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1236,6 +1254,7 @@ define void @v_shuffle_v2f32_v3f32__3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1248,6 +1267,7 @@ define void @v_shuffle_v2f32_v3f32__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1381,9 +1401,10 @@ define void @v_shuffle_v2f32_v3f32__1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1393,9 +1414,10 @@ define void @v_shuffle_v2f32_v3f32__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1411,6 +1433,7 @@ define void @v_shuffle_v2f32_v3f32__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1423,6 +1446,7 @@ define void @v_shuffle_v2f32_v3f32__2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1435,6 +1459,7 @@ define void @v_shuffle_v2f32_v3f32__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1800,6 +1825,7 @@ define void @v_shuffle_v2f32_v3f32__u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1812,6 +1838,7 @@ define void @v_shuffle_v2f32_v3f32__u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -2129,6 +2156,7 @@ define void @s_shuffle_v2f32_v3f32__0_u() {
define void @s_shuffle_v2f32_v3f32__1_u() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2141,6 +2169,7 @@ define void @s_shuffle_v2f32_v3f32__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2153,6 +2182,7 @@ define void @s_shuffle_v2f32_v3f32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2171,6 +2201,7 @@ define void @s_shuffle_v2f32_v3f32__1_u() {
define void @s_shuffle_v2f32_v3f32__2_u() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2183,6 +2214,7 @@ define void @s_shuffle_v2f32_v3f32__2_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2195,6 +2227,7 @@ define void @s_shuffle_v2f32_v3f32__2_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2227,6 +2260,7 @@ define void @s_shuffle_v2f32_v3f32__3_u() {
define void @s_shuffle_v2f32_v3f32__4_u() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2239,6 +2273,7 @@ define void @s_shuffle_v2f32_v3f32__4_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2251,6 +2286,7 @@ define void @s_shuffle_v2f32_v3f32__4_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2270,6 +2306,7 @@ define void @s_shuffle_v2f32_v3f32__4_u() {
define void @s_shuffle_v2f32_v3f32__5_u() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2282,6 +2319,7 @@ define void @s_shuffle_v2f32_v3f32__5_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2294,6 +2332,7 @@ define void @s_shuffle_v2f32_v3f32__5_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2586,6 +2625,7 @@ define void @s_shuffle_v2f32_v3f32__5_5() {
define void @s_shuffle_v2f32_v3f32__u_0() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2598,6 +2638,7 @@ define void @s_shuffle_v2f32_v3f32__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2610,6 +2651,7 @@ define void @s_shuffle_v2f32_v3f32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2736,6 +2778,7 @@ define void @s_shuffle_v2f32_v3f32__2_0() {
define void @s_shuffle_v2f32_v3f32__3_0() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__3_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2748,6 +2791,7 @@ define void @s_shuffle_v2f32_v3f32__3_0() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__3_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2760,6 +2804,7 @@ define void @s_shuffle_v2f32_v3f32__3_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__3_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3041,6 +3086,7 @@ define void @s_shuffle_v2f32_v3f32__4_1() {
define void @s_shuffle_v2f32_v3f32__u_2() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3053,6 +3099,7 @@ define void @s_shuffle_v2f32_v3f32__u_2() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3065,6 +3112,7 @@ define void @s_shuffle_v2f32_v3f32__u_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3191,6 +3239,7 @@ define void @s_shuffle_v2f32_v3f32__2_2() {
define void @s_shuffle_v2f32_v3f32__3_2() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__3_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3203,6 +3252,7 @@ define void @s_shuffle_v2f32_v3f32__3_2() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__3_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3215,6 +3265,7 @@ define void @s_shuffle_v2f32_v3f32__3_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__3_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3342,6 +3393,7 @@ define void @s_shuffle_v2f32_v3f32__0_3() {
define void @s_shuffle_v2f32_v3f32__1_3() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__1_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3354,6 +3406,7 @@ define void @s_shuffle_v2f32_v3f32__1_3() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__1_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3366,6 +3419,7 @@ define void @s_shuffle_v2f32_v3f32__1_3() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3384,6 +3438,7 @@ define void @s_shuffle_v2f32_v3f32__1_3() {
define void @s_shuffle_v2f32_v3f32__2_3() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__2_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3396,6 +3451,7 @@ define void @s_shuffle_v2f32_v3f32__2_3() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__2_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3408,6 +3464,7 @@ define void @s_shuffle_v2f32_v3f32__2_3() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3743,6 +3800,7 @@ define void @s_shuffle_v2f32_v3f32__4_4() {
define void @s_shuffle_v2f32_v3f32__u_5() {
; GFX900-LABEL: s_shuffle_v2f32_v3f32__u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3755,6 +3813,7 @@ define void @s_shuffle_v2f32_v3f32__u_5() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v3f32__u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3767,6 +3826,7 @@ define void @s_shuffle_v2f32_v3f32__u_5() {
;
; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
index 07ca294019341..8c507189f18e1 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2f32_v4f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2f32_v4f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -141,6 +143,7 @@ define void @v_shuffle_v2f32_v4f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -153,6 +156,7 @@ define void @v_shuffle_v2f32_v4f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -165,6 +169,7 @@ define void @v_shuffle_v2f32_v4f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -205,9 +210,10 @@ define void @v_shuffle_v2f32_v4f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -217,9 +223,10 @@ define void @v_shuffle_v2f32_v4f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -276,6 +283,7 @@ define void @v_shuffle_v2f32_v4f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -288,6 +296,7 @@ define void @v_shuffle_v2f32_v4f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -300,6 +309,7 @@ define void @v_shuffle_v2f32_v4f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -707,9 +717,10 @@ define void @v_shuffle_v2f32_v4f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -719,9 +730,10 @@ define void @v_shuffle_v2f32_v4f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -731,9 +743,10 @@ define void @v_shuffle_v2f32_v4f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -920,9 +933,10 @@ define void @v_shuffle_v2f32_v4f32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -932,9 +946,10 @@ define void @v_shuffle_v2f32_v4f32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,9 +959,10 @@ define void @v_shuffle_v2f32_v4f32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1428,6 +1444,7 @@ define void @v_shuffle_v2f32_v4f32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -1440,6 +1457,7 @@ define void @v_shuffle_v2f32_v4f32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -1638,6 +1656,7 @@ define void @v_shuffle_v2f32_v4f32__4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -1650,6 +1669,7 @@ define void @v_shuffle_v2f32_v4f32__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -2184,9 +2204,10 @@ define void @v_shuffle_v2f32_v4f32__1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2196,9 +2217,10 @@ define void @v_shuffle_v2f32_v4f32__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2253,6 +2275,7 @@ define void @v_shuffle_v2f32_v4f32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2265,6 +2288,7 @@ define void @v_shuffle_v2f32_v4f32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2277,6 +2301,7 @@ define void @v_shuffle_v2f32_v4f32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -2781,6 +2806,7 @@ define void @v_shuffle_v2f32_v4f32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2793,6 +2819,7 @@ define void @v_shuffle_v2f32_v4f32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -3582,6 +3609,7 @@ define void @s_shuffle_v2f32_v4f32__0_u() {
define void @s_shuffle_v2f32_v4f32__1_u() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3594,6 +3622,7 @@ define void @s_shuffle_v2f32_v4f32__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3606,6 +3635,7 @@ define void @s_shuffle_v2f32_v4f32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3666,6 +3696,7 @@ define void @s_shuffle_v2f32_v4f32__2_u() {
define void @s_shuffle_v2f32_v4f32__3_u() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3678,6 +3709,7 @@ define void @s_shuffle_v2f32_v4f32__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3690,6 +3722,7 @@ define void @s_shuffle_v2f32_v4f32__3_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3722,6 +3755,7 @@ define void @s_shuffle_v2f32_v4f32__4_u() {
define void @s_shuffle_v2f32_v4f32__5_u() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3734,6 +3768,7 @@ define void @s_shuffle_v2f32_v4f32__5_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3746,6 +3781,7 @@ define void @s_shuffle_v2f32_v4f32__5_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3808,6 +3844,7 @@ define void @s_shuffle_v2f32_v4f32__6_u() {
define void @s_shuffle_v2f32_v4f32__7_u() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3820,6 +3857,7 @@ define void @s_shuffle_v2f32_v4f32__7_u() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3832,6 +3870,7 @@ define void @s_shuffle_v2f32_v4f32__7_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4225,6 +4264,7 @@ define void @s_shuffle_v2f32_v4f32__7_7() {
define void @s_shuffle_v2f32_v4f32__u_0() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4237,6 +4277,7 @@ define void @s_shuffle_v2f32_v4f32__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4249,6 +4290,7 @@ define void @s_shuffle_v2f32_v4f32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4420,6 +4462,7 @@ define void @s_shuffle_v2f32_v4f32__3_0() {
define void @s_shuffle_v2f32_v4f32__4_0() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__4_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4432,6 +4475,7 @@ define void @s_shuffle_v2f32_v4f32__4_0() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__4_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4444,6 +4488,7 @@ define void @s_shuffle_v2f32_v4f32__4_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4880,6 +4925,7 @@ define void @s_shuffle_v2f32_v4f32__6_1() {
define void @s_shuffle_v2f32_v4f32__u_2() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4892,6 +4938,7 @@ define void @s_shuffle_v2f32_v4f32__u_2() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4904,6 +4951,7 @@ define void @s_shuffle_v2f32_v4f32__u_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5075,6 +5123,7 @@ define void @s_shuffle_v2f32_v4f32__3_2() {
define void @s_shuffle_v2f32_v4f32__4_2() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__4_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5087,6 +5136,7 @@ define void @s_shuffle_v2f32_v4f32__4_2() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__4_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5099,6 +5149,7 @@ define void @s_shuffle_v2f32_v4f32__4_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5625,6 +5676,7 @@ define void @s_shuffle_v2f32_v4f32__0_4() {
define void @s_shuffle_v2f32_v4f32__1_4() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__1_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5637,6 +5689,7 @@ define void @s_shuffle_v2f32_v4f32__1_4() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__1_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5649,6 +5702,7 @@ define void @s_shuffle_v2f32_v4f32__1_4() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5709,6 +5763,7 @@ define void @s_shuffle_v2f32_v4f32__2_4() {
define void @s_shuffle_v2f32_v4f32__3_4() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__3_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5721,6 +5776,7 @@ define void @s_shuffle_v2f32_v4f32__3_4() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__3_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5733,6 +5789,7 @@ define void @s_shuffle_v2f32_v4f32__3_4() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -6215,6 +6272,7 @@ define void @s_shuffle_v2f32_v4f32__6_5() {
define void @s_shuffle_v2f32_v4f32__u_6() {
; GFX900-LABEL: s_shuffle_v2f32_v4f32__u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -6227,6 +6285,7 @@ define void @s_shuffle_v2f32_v4f32__u_6() {
;
; GFX90A-LABEL: s_shuffle_v2f32_v4f32__u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -6239,6 +6298,7 @@ define void @s_shuffle_v2f32_v4f32__u_6() {
;
; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
index 3deb23ca5314b..f8492370a55bf 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2f32_v8f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2f32_v8f32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x float> asm "; def $0", "=v"()
@@ -152,6 +154,7 @@ define void @v_shuffle_v2f32_v8f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -164,6 +167,7 @@ define void @v_shuffle_v2f32_v8f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -232,6 +236,7 @@ define void @v_shuffle_v2f32_v8f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -244,6 +249,7 @@ define void @v_shuffle_v2f32_v8f32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -301,6 +307,7 @@ define void @v_shuffle_v2f32_v8f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -313,6 +320,7 @@ define void @v_shuffle_v2f32_v8f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -325,6 +333,7 @@ define void @v_shuffle_v2f32_v8f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -365,9 +374,10 @@ define void @v_shuffle_v2f32_v8f32__9_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -377,9 +387,10 @@ define void @v_shuffle_v2f32_v8f32__9_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x float> asm "; def $0", "=v"()
@@ -447,6 +458,7 @@ define void @v_shuffle_v2f32_v8f32__11_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -459,6 +471,7 @@ define void @v_shuffle_v2f32_v8f32__11_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -529,6 +542,7 @@ define void @v_shuffle_v2f32_v8f32__13_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -541,6 +555,7 @@ define void @v_shuffle_v2f32_v8f32__13_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -600,6 +615,7 @@ define void @v_shuffle_v2f32_v8f32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -612,6 +628,7 @@ define void @v_shuffle_v2f32_v8f32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -624,6 +641,7 @@ define void @v_shuffle_v2f32_v8f32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -1419,9 +1437,10 @@ define void @v_shuffle_v2f32_v8f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1431,9 +1450,10 @@ define void @v_shuffle_v2f32_v8f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1443,9 +1463,10 @@ define void @v_shuffle_v2f32_v8f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x float> asm "; def $0", "=v"()
@@ -1804,9 +1825,10 @@ define void @v_shuffle_v2f32_v8f32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1816,9 +1838,10 @@ define void @v_shuffle_v2f32_v8f32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1828,9 +1851,10 @@ define void @v_shuffle_v2f32_v8f32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x float> asm "; def $0", "=v"()
@@ -2904,6 +2928,7 @@ define void @v_shuffle_v2f32_v8f32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -2916,6 +2941,7 @@ define void @v_shuffle_v2f32_v8f32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -3286,6 +3312,7 @@ define void @v_shuffle_v2f32_v8f32__8_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -3298,6 +3325,7 @@ define void @v_shuffle_v2f32_v8f32__8_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -4374,6 +4402,7 @@ define void @v_shuffle_v2f32_v8f32__u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -4386,6 +4415,7 @@ define void @v_shuffle_v2f32_v8f32__u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -4756,6 +4786,7 @@ define void @v_shuffle_v2f32_v8f32__8_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -4768,6 +4799,7 @@ define void @v_shuffle_v2f32_v8f32__8_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -5844,6 +5876,7 @@ define void @v_shuffle_v2f32_v8f32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -5856,6 +5889,7 @@ define void @v_shuffle_v2f32_v8f32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -6226,6 +6260,7 @@ define void @v_shuffle_v2f32_v8f32__8_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -6238,6 +6273,7 @@ define void @v_shuffle_v2f32_v8f32__8_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7364,9 +7400,10 @@ define void @v_shuffle_v2f32_v8f32__1_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7376,9 +7413,10 @@ define void @v_shuffle_v2f32_v8f32__1_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x float> asm "; def $0", "=v"()
@@ -7444,6 +7482,7 @@ define void @v_shuffle_v2f32_v8f32__3_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7456,6 +7495,7 @@ define void @v_shuffle_v2f32_v8f32__3_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7524,6 +7564,7 @@ define void @v_shuffle_v2f32_v8f32__5_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7536,6 +7577,7 @@ define void @v_shuffle_v2f32_v8f32__5_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7593,6 +7635,7 @@ define void @v_shuffle_v2f32_v8f32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7605,6 +7648,7 @@ define void @v_shuffle_v2f32_v8f32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7617,6 +7661,7 @@ define void @v_shuffle_v2f32_v8f32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -8681,6 +8726,7 @@ define void @v_shuffle_v2f32_v8f32__u_10(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -8693,6 +8739,7 @@ define void @v_shuffle_v2f32_v8f32__u_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -10213,6 +10260,7 @@ define void @v_shuffle_v2f32_v8f32__u_12(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -10225,6 +10273,7 @@ define void @v_shuffle_v2f32_v8f32__u_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -11745,6 +11794,7 @@ define void @v_shuffle_v2f32_v8f32__u_14(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -11757,6 +11807,7 @@ define void @v_shuffle_v2f32_v8f32__u_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -13322,6 +13373,7 @@ define void @s_shuffle_v2f32_v8f32__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13334,6 +13386,7 @@ define void @s_shuffle_v2f32_v8f32__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13342,6 +13395,7 @@ define void @s_shuffle_v2f32_v8f32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13406,6 +13460,7 @@ define void @s_shuffle_v2f32_v8f32__3_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13418,6 +13473,7 @@ define void @s_shuffle_v2f32_v8f32__3_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13426,6 +13482,7 @@ define void @s_shuffle_v2f32_v8f32__3_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13486,9 +13543,10 @@ define void @s_shuffle_v2f32_v8f32__5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -13498,9 +13556,10 @@ define void @s_shuffle_v2f32_v8f32__5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -13508,6 +13567,7 @@ define void @s_shuffle_v2f32_v8f32__5_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13572,6 +13632,7 @@ define void @s_shuffle_v2f32_v8f32__7_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13584,6 +13645,7 @@ define void @s_shuffle_v2f32_v8f32__7_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13592,6 +13654,7 @@ define void @s_shuffle_v2f32_v8f32__7_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13628,6 +13691,7 @@ define void @s_shuffle_v2f32_v8f32__9_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13640,6 +13704,7 @@ define void @s_shuffle_v2f32_v8f32__9_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13648,6 +13713,7 @@ define void @s_shuffle_v2f32_v8f32__9_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13714,6 +13780,7 @@ define void @s_shuffle_v2f32_v8f32__11_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13726,6 +13793,7 @@ define void @s_shuffle_v2f32_v8f32__11_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13734,6 +13802,7 @@ define void @s_shuffle_v2f32_v8f32__11_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13796,9 +13865,10 @@ define void @s_shuffle_v2f32_v8f32__13_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -13808,9 +13878,10 @@ define void @s_shuffle_v2f32_v8f32__13_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -13818,6 +13889,7 @@ define void @s_shuffle_v2f32_v8f32__13_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13884,6 +13956,7 @@ define void @s_shuffle_v2f32_v8f32__15_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13896,6 +13969,7 @@ define void @s_shuffle_v2f32_v8f32__15_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13904,6 +13978,7 @@ define void @s_shuffle_v2f32_v8f32__15_u() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14679,6 +14754,7 @@ define void @s_shuffle_v2f32_v8f32__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -14691,6 +14767,7 @@ define void @s_shuffle_v2f32_v8f32__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -14699,6 +14776,7 @@ define void @s_shuffle_v2f32_v8f32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15027,6 +15105,7 @@ define void @s_shuffle_v2f32_v8f32__8_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -15039,6 +15118,7 @@ define void @s_shuffle_v2f32_v8f32__8_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -15047,6 +15127,7 @@ define void @s_shuffle_v2f32_v8f32__8_0() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -16023,6 +16104,7 @@ define void @s_shuffle_v2f32_v8f32__u_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -16035,6 +16117,7 @@ define void @s_shuffle_v2f32_v8f32__u_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -16043,6 +16126,7 @@ define void @s_shuffle_v2f32_v8f32__u_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -16371,6 +16455,7 @@ define void @s_shuffle_v2f32_v8f32__8_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -16383,6 +16468,7 @@ define void @s_shuffle_v2f32_v8f32__8_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -16391,6 +16477,7 @@ define void @s_shuffle_v2f32_v8f32__8_2() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -17464,9 +17551,10 @@ define void @s_shuffle_v2f32_v8f32__u_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -17476,9 +17564,10 @@ define void @s_shuffle_v2f32_v8f32__u_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -17486,6 +17575,7 @@ define void @s_shuffle_v2f32_v8f32__u_4() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -17812,9 +17902,10 @@ define void @s_shuffle_v2f32_v8f32__8_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -17824,9 +17915,10 @@ define void @s_shuffle_v2f32_v8f32__8_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -17834,6 +17926,7 @@ define void @s_shuffle_v2f32_v8f32__8_4() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -18806,6 +18899,7 @@ define void @s_shuffle_v2f32_v8f32__u_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -18818,6 +18912,7 @@ define void @s_shuffle_v2f32_v8f32__u_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -18826,6 +18921,7 @@ define void @s_shuffle_v2f32_v8f32__u_6() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -19154,6 +19250,7 @@ define void @s_shuffle_v2f32_v8f32__8_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -19166,6 +19263,7 @@ define void @s_shuffle_v2f32_v8f32__8_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -19174,6 +19272,7 @@ define void @s_shuffle_v2f32_v8f32__8_6() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20293,6 +20392,7 @@ define void @s_shuffle_v2f32_v8f32__1_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20305,6 +20405,7 @@ define void @s_shuffle_v2f32_v8f32__1_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20313,6 +20414,7 @@ define void @s_shuffle_v2f32_v8f32__1_8() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20377,6 +20479,7 @@ define void @s_shuffle_v2f32_v8f32__3_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20389,6 +20492,7 @@ define void @s_shuffle_v2f32_v8f32__3_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20397,6 +20501,7 @@ define void @s_shuffle_v2f32_v8f32__3_8() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20457,9 +20562,10 @@ define void @s_shuffle_v2f32_v8f32__5_8() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -20469,9 +20575,10 @@ define void @s_shuffle_v2f32_v8f32__5_8() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -20479,6 +20586,7 @@ define void @s_shuffle_v2f32_v8f32__5_8() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20543,6 +20651,7 @@ define void @s_shuffle_v2f32_v8f32__7_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20555,6 +20664,7 @@ define void @s_shuffle_v2f32_v8f32__7_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20563,6 +20673,7 @@ define void @s_shuffle_v2f32_v8f32__7_8() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -21522,6 +21633,7 @@ define void @s_shuffle_v2f32_v8f32__u_10() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -21534,6 +21646,7 @@ define void @s_shuffle_v2f32_v8f32__u_10() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -21542,6 +21655,7 @@ define void @s_shuffle_v2f32_v8f32__u_10() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_10:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -23015,9 +23129,10 @@ define void @s_shuffle_v2f32_v8f32__u_12() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -23027,9 +23142,10 @@ define void @s_shuffle_v2f32_v8f32__u_12() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -23037,6 +23153,7 @@ define void @s_shuffle_v2f32_v8f32__u_12() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_12:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -24442,6 +24559,7 @@ define void @s_shuffle_v2f32_v8f32__u_14() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -24454,6 +24572,7 @@ define void @s_shuffle_v2f32_v8f32__u_14() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -24462,6 +24581,7 @@ define void @s_shuffle_v2f32_v8f32__u_14() {
;
; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_14:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll
index 676a521757bd8..4b4f36f60b30a 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll
@@ -57,37 +57,41 @@ define void @v_shuffle_v2i32_v2i32__0_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2i32_v2i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v2i32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i32_v2i32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -110,37 +114,41 @@ define void @v_shuffle_v2i32_v2i32__2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2i32_v2i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v2i32__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i32_v2i32__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i32_v2i32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -350,33 +358,37 @@ define void @v_shuffle_v2i32_v2i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i32_v2i32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i32_v2i32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -476,33 +488,37 @@ define void @v_shuffle_v2i32_v2i32__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i32_v2i32__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i32_v2i32__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -723,37 +739,41 @@ define void @v_shuffle_v2i32_v2i32__0_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2i32_v2i32__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v2i32__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i32_v2i32__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -1016,6 +1036,7 @@ define void @s_shuffle_v2i32_v2i32__0_u() {
define void @s_shuffle_v2i32_v2i32__1_u() {
; GFX900-LABEL: s_shuffle_v2i32_v2i32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1028,6 +1049,7 @@ define void @s_shuffle_v2i32_v2i32__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v2i32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1040,6 +1062,7 @@ define void @s_shuffle_v2i32_v2i32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v2i32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1072,6 +1095,7 @@ define void @s_shuffle_v2i32_v2i32__2_u() {
define void @s_shuffle_v2i32_v2i32__3_u() {
; GFX900-LABEL: s_shuffle_v2i32_v2i32__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1084,6 +1108,7 @@ define void @s_shuffle_v2i32_v2i32__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v2i32__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1096,6 +1121,7 @@ define void @s_shuffle_v2i32_v2i32__3_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v2i32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1287,6 +1313,7 @@ define void @s_shuffle_v2i32_v2i32__3_3() {
define void @s_shuffle_v2i32_v2i32__u_0() {
; GFX900-LABEL: s_shuffle_v2i32_v2i32__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1299,6 +1326,7 @@ define void @s_shuffle_v2i32_v2i32__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v2i32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1311,6 +1339,7 @@ define void @s_shuffle_v2i32_v2i32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v2i32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1392,6 +1421,7 @@ define void @s_shuffle_v2i32_v2i32__1_0() {
define void @s_shuffle_v2i32_v2i32__2_0() {
; GFX900-LABEL: s_shuffle_v2i32_v2i32__2_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1404,6 +1434,7 @@ define void @s_shuffle_v2i32_v2i32__2_0() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v2i32__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1416,6 +1447,7 @@ define void @s_shuffle_v2i32_v2i32__2_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v2i32__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1626,6 +1658,7 @@ define void @s_shuffle_v2i32_v2i32__0_2() {
define void @s_shuffle_v2i32_v2i32__1_2() {
; GFX900-LABEL: s_shuffle_v2i32_v2i32__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1638,6 +1671,7 @@ define void @s_shuffle_v2i32_v2i32__1_2() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v2i32__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1650,6 +1684,7 @@ define void @s_shuffle_v2i32_v2i32__1_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v2i32__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll
index f65340470feb1..e6e1b6b67bcb6 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2i32_v3i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2i32_v3i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -102,6 +104,7 @@ define void @v_shuffle_v2i32_v3i32__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -114,6 +117,7 @@ define void @v_shuffle_v2i32_v3i32__2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -126,6 +130,7 @@ define void @v_shuffle_v2i32_v3i32__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -166,9 +171,10 @@ define void @v_shuffle_v2i32_v3i32__4_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -178,9 +184,10 @@ define void @v_shuffle_v2i32_v3i32__4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -197,6 +204,7 @@ define void @v_shuffle_v2i32_v3i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -209,6 +217,7 @@ define void @v_shuffle_v2i32_v3i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -221,6 +230,7 @@ define void @v_shuffle_v2i32_v3i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -536,9 +546,10 @@ define void @v_shuffle_v2i32_v3i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -548,9 +559,10 @@ define void @v_shuffle_v2i32_v3i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -560,9 +572,10 @@ define void @v_shuffle_v2i32_v3i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -706,9 +719,10 @@ define void @v_shuffle_v2i32_v3i32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -718,9 +732,10 @@ define void @v_shuffle_v2i32_v3i32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -730,9 +745,10 @@ define void @v_shuffle_v2i32_v3i32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1066,6 +1082,7 @@ define void @v_shuffle_v2i32_v3i32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1078,6 +1095,7 @@ define void @v_shuffle_v2i32_v3i32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1236,6 +1254,7 @@ define void @v_shuffle_v2i32_v3i32__3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1248,6 +1267,7 @@ define void @v_shuffle_v2i32_v3i32__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1381,9 +1401,10 @@ define void @v_shuffle_v2i32_v3i32__1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1393,9 +1414,10 @@ define void @v_shuffle_v2i32_v3i32__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1411,6 +1433,7 @@ define void @v_shuffle_v2i32_v3i32__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1423,6 +1446,7 @@ define void @v_shuffle_v2i32_v3i32__2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1435,6 +1459,7 @@ define void @v_shuffle_v2i32_v3i32__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1800,6 +1825,7 @@ define void @v_shuffle_v2i32_v3i32__u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1812,6 +1838,7 @@ define void @v_shuffle_v2i32_v3i32__u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -2129,6 +2156,7 @@ define void @s_shuffle_v2i32_v3i32__0_u() {
define void @s_shuffle_v2i32_v3i32__1_u() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2141,6 +2169,7 @@ define void @s_shuffle_v2i32_v3i32__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2153,6 +2182,7 @@ define void @s_shuffle_v2i32_v3i32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2171,6 +2201,7 @@ define void @s_shuffle_v2i32_v3i32__1_u() {
define void @s_shuffle_v2i32_v3i32__2_u() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2183,6 +2214,7 @@ define void @s_shuffle_v2i32_v3i32__2_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2195,6 +2227,7 @@ define void @s_shuffle_v2i32_v3i32__2_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2227,6 +2260,7 @@ define void @s_shuffle_v2i32_v3i32__3_u() {
define void @s_shuffle_v2i32_v3i32__4_u() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2239,6 +2273,7 @@ define void @s_shuffle_v2i32_v3i32__4_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2251,6 +2286,7 @@ define void @s_shuffle_v2i32_v3i32__4_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2270,6 +2306,7 @@ define void @s_shuffle_v2i32_v3i32__4_u() {
define void @s_shuffle_v2i32_v3i32__5_u() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2282,6 +2319,7 @@ define void @s_shuffle_v2i32_v3i32__5_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2294,6 +2332,7 @@ define void @s_shuffle_v2i32_v3i32__5_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2586,6 +2625,7 @@ define void @s_shuffle_v2i32_v3i32__5_5() {
define void @s_shuffle_v2i32_v3i32__u_0() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2598,6 +2638,7 @@ define void @s_shuffle_v2i32_v3i32__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2610,6 +2651,7 @@ define void @s_shuffle_v2i32_v3i32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2736,6 +2778,7 @@ define void @s_shuffle_v2i32_v3i32__2_0() {
define void @s_shuffle_v2i32_v3i32__3_0() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__3_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2748,6 +2791,7 @@ define void @s_shuffle_v2i32_v3i32__3_0() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__3_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2760,6 +2804,7 @@ define void @s_shuffle_v2i32_v3i32__3_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__3_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3041,6 +3086,7 @@ define void @s_shuffle_v2i32_v3i32__4_1() {
define void @s_shuffle_v2i32_v3i32__u_2() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3053,6 +3099,7 @@ define void @s_shuffle_v2i32_v3i32__u_2() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3065,6 +3112,7 @@ define void @s_shuffle_v2i32_v3i32__u_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3191,6 +3239,7 @@ define void @s_shuffle_v2i32_v3i32__2_2() {
define void @s_shuffle_v2i32_v3i32__3_2() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__3_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3203,6 +3252,7 @@ define void @s_shuffle_v2i32_v3i32__3_2() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__3_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3215,6 +3265,7 @@ define void @s_shuffle_v2i32_v3i32__3_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__3_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3342,6 +3393,7 @@ define void @s_shuffle_v2i32_v3i32__0_3() {
define void @s_shuffle_v2i32_v3i32__1_3() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__1_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3354,6 +3406,7 @@ define void @s_shuffle_v2i32_v3i32__1_3() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__1_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3366,6 +3419,7 @@ define void @s_shuffle_v2i32_v3i32__1_3() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3384,6 +3438,7 @@ define void @s_shuffle_v2i32_v3i32__1_3() {
define void @s_shuffle_v2i32_v3i32__2_3() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__2_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3396,6 +3451,7 @@ define void @s_shuffle_v2i32_v3i32__2_3() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__2_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3408,6 +3464,7 @@ define void @s_shuffle_v2i32_v3i32__2_3() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3743,6 +3800,7 @@ define void @s_shuffle_v2i32_v3i32__4_4() {
define void @s_shuffle_v2i32_v3i32__u_5() {
; GFX900-LABEL: s_shuffle_v2i32_v3i32__u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3755,6 +3813,7 @@ define void @s_shuffle_v2i32_v3i32__u_5() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v3i32__u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3767,6 +3826,7 @@ define void @s_shuffle_v2i32_v3i32__u_5() {
;
; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
index 37df1b6a72e03..42b6563a0c8e8 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2i32_v4i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2i32_v4i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -141,6 +143,7 @@ define void @v_shuffle_v2i32_v4i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -153,6 +156,7 @@ define void @v_shuffle_v2i32_v4i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -165,6 +169,7 @@ define void @v_shuffle_v2i32_v4i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -205,9 +210,10 @@ define void @v_shuffle_v2i32_v4i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -217,9 +223,10 @@ define void @v_shuffle_v2i32_v4i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -276,6 +283,7 @@ define void @v_shuffle_v2i32_v4i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -288,6 +296,7 @@ define void @v_shuffle_v2i32_v4i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -300,6 +309,7 @@ define void @v_shuffle_v2i32_v4i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -707,9 +717,10 @@ define void @v_shuffle_v2i32_v4i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -719,9 +730,10 @@ define void @v_shuffle_v2i32_v4i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -731,9 +743,10 @@ define void @v_shuffle_v2i32_v4i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -920,9 +933,10 @@ define void @v_shuffle_v2i32_v4i32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -932,9 +946,10 @@ define void @v_shuffle_v2i32_v4i32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,9 +959,10 @@ define void @v_shuffle_v2i32_v4i32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1428,6 +1444,7 @@ define void @v_shuffle_v2i32_v4i32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -1440,6 +1457,7 @@ define void @v_shuffle_v2i32_v4i32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -1638,6 +1656,7 @@ define void @v_shuffle_v2i32_v4i32__4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -1650,6 +1669,7 @@ define void @v_shuffle_v2i32_v4i32__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -2184,9 +2204,10 @@ define void @v_shuffle_v2i32_v4i32__1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2196,9 +2217,10 @@ define void @v_shuffle_v2i32_v4i32__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2253,6 +2275,7 @@ define void @v_shuffle_v2i32_v4i32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2265,6 +2288,7 @@ define void @v_shuffle_v2i32_v4i32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2277,6 +2301,7 @@ define void @v_shuffle_v2i32_v4i32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -2781,6 +2806,7 @@ define void @v_shuffle_v2i32_v4i32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2793,6 +2819,7 @@ define void @v_shuffle_v2i32_v4i32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -3582,6 +3609,7 @@ define void @s_shuffle_v2i32_v4i32__0_u() {
define void @s_shuffle_v2i32_v4i32__1_u() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3594,6 +3622,7 @@ define void @s_shuffle_v2i32_v4i32__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3606,6 +3635,7 @@ define void @s_shuffle_v2i32_v4i32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3666,6 +3696,7 @@ define void @s_shuffle_v2i32_v4i32__2_u() {
define void @s_shuffle_v2i32_v4i32__3_u() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3678,6 +3709,7 @@ define void @s_shuffle_v2i32_v4i32__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3690,6 +3722,7 @@ define void @s_shuffle_v2i32_v4i32__3_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3722,6 +3755,7 @@ define void @s_shuffle_v2i32_v4i32__4_u() {
define void @s_shuffle_v2i32_v4i32__5_u() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3734,6 +3768,7 @@ define void @s_shuffle_v2i32_v4i32__5_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3746,6 +3781,7 @@ define void @s_shuffle_v2i32_v4i32__5_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3808,6 +3844,7 @@ define void @s_shuffle_v2i32_v4i32__6_u() {
define void @s_shuffle_v2i32_v4i32__7_u() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3820,6 +3857,7 @@ define void @s_shuffle_v2i32_v4i32__7_u() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3832,6 +3870,7 @@ define void @s_shuffle_v2i32_v4i32__7_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4225,6 +4264,7 @@ define void @s_shuffle_v2i32_v4i32__7_7() {
define void @s_shuffle_v2i32_v4i32__u_0() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4237,6 +4277,7 @@ define void @s_shuffle_v2i32_v4i32__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4249,6 +4290,7 @@ define void @s_shuffle_v2i32_v4i32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4420,6 +4462,7 @@ define void @s_shuffle_v2i32_v4i32__3_0() {
define void @s_shuffle_v2i32_v4i32__4_0() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__4_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4432,6 +4475,7 @@ define void @s_shuffle_v2i32_v4i32__4_0() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__4_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4444,6 +4488,7 @@ define void @s_shuffle_v2i32_v4i32__4_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4880,6 +4925,7 @@ define void @s_shuffle_v2i32_v4i32__6_1() {
define void @s_shuffle_v2i32_v4i32__u_2() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4892,6 +4938,7 @@ define void @s_shuffle_v2i32_v4i32__u_2() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4904,6 +4951,7 @@ define void @s_shuffle_v2i32_v4i32__u_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5075,6 +5123,7 @@ define void @s_shuffle_v2i32_v4i32__3_2() {
define void @s_shuffle_v2i32_v4i32__4_2() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__4_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5087,6 +5136,7 @@ define void @s_shuffle_v2i32_v4i32__4_2() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__4_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5099,6 +5149,7 @@ define void @s_shuffle_v2i32_v4i32__4_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5625,6 +5676,7 @@ define void @s_shuffle_v2i32_v4i32__0_4() {
define void @s_shuffle_v2i32_v4i32__1_4() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__1_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5637,6 +5689,7 @@ define void @s_shuffle_v2i32_v4i32__1_4() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__1_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5649,6 +5702,7 @@ define void @s_shuffle_v2i32_v4i32__1_4() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5709,6 +5763,7 @@ define void @s_shuffle_v2i32_v4i32__2_4() {
define void @s_shuffle_v2i32_v4i32__3_4() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__3_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5721,6 +5776,7 @@ define void @s_shuffle_v2i32_v4i32__3_4() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__3_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5733,6 +5789,7 @@ define void @s_shuffle_v2i32_v4i32__3_4() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -6215,6 +6272,7 @@ define void @s_shuffle_v2i32_v4i32__6_5() {
define void @s_shuffle_v2i32_v4i32__u_6() {
; GFX900-LABEL: s_shuffle_v2i32_v4i32__u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -6227,6 +6285,7 @@ define void @s_shuffle_v2i32_v4i32__u_6() {
;
; GFX90A-LABEL: s_shuffle_v2i32_v4i32__u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -6239,6 +6298,7 @@ define void @s_shuffle_v2i32_v4i32__u_6() {
;
; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
index 94ee1774c2766..6763eb143a4d0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2i32_v8i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2i32_v8i32__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i32> asm "; def $0", "=v"()
@@ -152,6 +154,7 @@ define void @v_shuffle_v2i32_v8i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -164,6 +167,7 @@ define void @v_shuffle_v2i32_v8i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -232,6 +236,7 @@ define void @v_shuffle_v2i32_v8i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -244,6 +249,7 @@ define void @v_shuffle_v2i32_v8i32__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -301,6 +307,7 @@ define void @v_shuffle_v2i32_v8i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -313,6 +320,7 @@ define void @v_shuffle_v2i32_v8i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -325,6 +333,7 @@ define void @v_shuffle_v2i32_v8i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -365,9 +374,10 @@ define void @v_shuffle_v2i32_v8i32__9_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -377,9 +387,10 @@ define void @v_shuffle_v2i32_v8i32__9_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i32> asm "; def $0", "=v"()
@@ -447,6 +458,7 @@ define void @v_shuffle_v2i32_v8i32__11_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -459,6 +471,7 @@ define void @v_shuffle_v2i32_v8i32__11_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -529,6 +542,7 @@ define void @v_shuffle_v2i32_v8i32__13_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -541,6 +555,7 @@ define void @v_shuffle_v2i32_v8i32__13_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -600,6 +615,7 @@ define void @v_shuffle_v2i32_v8i32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -612,6 +628,7 @@ define void @v_shuffle_v2i32_v8i32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -624,6 +641,7 @@ define void @v_shuffle_v2i32_v8i32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -1419,9 +1437,10 @@ define void @v_shuffle_v2i32_v8i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1431,9 +1450,10 @@ define void @v_shuffle_v2i32_v8i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1443,9 +1463,10 @@ define void @v_shuffle_v2i32_v8i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i32> asm "; def $0", "=v"()
@@ -1804,9 +1825,10 @@ define void @v_shuffle_v2i32_v8i32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1816,9 +1838,10 @@ define void @v_shuffle_v2i32_v8i32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1828,9 +1851,10 @@ define void @v_shuffle_v2i32_v8i32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i32> asm "; def $0", "=v"()
@@ -2904,6 +2928,7 @@ define void @v_shuffle_v2i32_v8i32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -2916,6 +2941,7 @@ define void @v_shuffle_v2i32_v8i32__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -3286,6 +3312,7 @@ define void @v_shuffle_v2i32_v8i32__8_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -3298,6 +3325,7 @@ define void @v_shuffle_v2i32_v8i32__8_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -4374,6 +4402,7 @@ define void @v_shuffle_v2i32_v8i32__u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -4386,6 +4415,7 @@ define void @v_shuffle_v2i32_v8i32__u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -4756,6 +4786,7 @@ define void @v_shuffle_v2i32_v8i32__8_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -4768,6 +4799,7 @@ define void @v_shuffle_v2i32_v8i32__8_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -5844,6 +5876,7 @@ define void @v_shuffle_v2i32_v8i32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -5856,6 +5889,7 @@ define void @v_shuffle_v2i32_v8i32__u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -6226,6 +6260,7 @@ define void @v_shuffle_v2i32_v8i32__8_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -6238,6 +6273,7 @@ define void @v_shuffle_v2i32_v8i32__8_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7364,9 +7400,10 @@ define void @v_shuffle_v2i32_v8i32__1_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7376,9 +7413,10 @@ define void @v_shuffle_v2i32_v8i32__1_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i32> asm "; def $0", "=v"()
@@ -7444,6 +7482,7 @@ define void @v_shuffle_v2i32_v8i32__3_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7456,6 +7495,7 @@ define void @v_shuffle_v2i32_v8i32__3_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7524,6 +7564,7 @@ define void @v_shuffle_v2i32_v8i32__5_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7536,6 +7577,7 @@ define void @v_shuffle_v2i32_v8i32__5_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7593,6 +7635,7 @@ define void @v_shuffle_v2i32_v8i32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7605,6 +7648,7 @@ define void @v_shuffle_v2i32_v8i32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7617,6 +7661,7 @@ define void @v_shuffle_v2i32_v8i32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -8681,6 +8726,7 @@ define void @v_shuffle_v2i32_v8i32__u_10(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -8693,6 +8739,7 @@ define void @v_shuffle_v2i32_v8i32__u_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -10213,6 +10260,7 @@ define void @v_shuffle_v2i32_v8i32__u_12(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -10225,6 +10273,7 @@ define void @v_shuffle_v2i32_v8i32__u_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -11745,6 +11794,7 @@ define void @v_shuffle_v2i32_v8i32__u_14(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -11757,6 +11807,7 @@ define void @v_shuffle_v2i32_v8i32__u_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -13322,6 +13373,7 @@ define void @s_shuffle_v2i32_v8i32__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13334,6 +13386,7 @@ define void @s_shuffle_v2i32_v8i32__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13342,6 +13395,7 @@ define void @s_shuffle_v2i32_v8i32__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13406,6 +13460,7 @@ define void @s_shuffle_v2i32_v8i32__3_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13418,6 +13473,7 @@ define void @s_shuffle_v2i32_v8i32__3_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13426,6 +13482,7 @@ define void @s_shuffle_v2i32_v8i32__3_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13486,9 +13543,10 @@ define void @s_shuffle_v2i32_v8i32__5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -13498,9 +13556,10 @@ define void @s_shuffle_v2i32_v8i32__5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -13508,6 +13567,7 @@ define void @s_shuffle_v2i32_v8i32__5_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13572,6 +13632,7 @@ define void @s_shuffle_v2i32_v8i32__7_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13584,6 +13645,7 @@ define void @s_shuffle_v2i32_v8i32__7_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13592,6 +13654,7 @@ define void @s_shuffle_v2i32_v8i32__7_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13628,6 +13691,7 @@ define void @s_shuffle_v2i32_v8i32__9_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13640,6 +13704,7 @@ define void @s_shuffle_v2i32_v8i32__9_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13648,6 +13713,7 @@ define void @s_shuffle_v2i32_v8i32__9_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13714,6 +13780,7 @@ define void @s_shuffle_v2i32_v8i32__11_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13726,6 +13793,7 @@ define void @s_shuffle_v2i32_v8i32__11_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13734,6 +13802,7 @@ define void @s_shuffle_v2i32_v8i32__11_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13796,9 +13865,10 @@ define void @s_shuffle_v2i32_v8i32__13_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -13808,9 +13878,10 @@ define void @s_shuffle_v2i32_v8i32__13_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -13818,6 +13889,7 @@ define void @s_shuffle_v2i32_v8i32__13_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13884,6 +13956,7 @@ define void @s_shuffle_v2i32_v8i32__15_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13896,6 +13969,7 @@ define void @s_shuffle_v2i32_v8i32__15_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13904,6 +13978,7 @@ define void @s_shuffle_v2i32_v8i32__15_u() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14679,6 +14754,7 @@ define void @s_shuffle_v2i32_v8i32__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -14691,6 +14767,7 @@ define void @s_shuffle_v2i32_v8i32__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -14699,6 +14776,7 @@ define void @s_shuffle_v2i32_v8i32__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15027,6 +15105,7 @@ define void @s_shuffle_v2i32_v8i32__8_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -15039,6 +15118,7 @@ define void @s_shuffle_v2i32_v8i32__8_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -15047,6 +15127,7 @@ define void @s_shuffle_v2i32_v8i32__8_0() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -16023,6 +16104,7 @@ define void @s_shuffle_v2i32_v8i32__u_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -16035,6 +16117,7 @@ define void @s_shuffle_v2i32_v8i32__u_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -16043,6 +16126,7 @@ define void @s_shuffle_v2i32_v8i32__u_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -16371,6 +16455,7 @@ define void @s_shuffle_v2i32_v8i32__8_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -16383,6 +16468,7 @@ define void @s_shuffle_v2i32_v8i32__8_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -16391,6 +16477,7 @@ define void @s_shuffle_v2i32_v8i32__8_2() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -17464,9 +17551,10 @@ define void @s_shuffle_v2i32_v8i32__u_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -17476,9 +17564,10 @@ define void @s_shuffle_v2i32_v8i32__u_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -17486,6 +17575,7 @@ define void @s_shuffle_v2i32_v8i32__u_4() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -17812,9 +17902,10 @@ define void @s_shuffle_v2i32_v8i32__8_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -17824,9 +17915,10 @@ define void @s_shuffle_v2i32_v8i32__8_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -17834,6 +17926,7 @@ define void @s_shuffle_v2i32_v8i32__8_4() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -18806,6 +18899,7 @@ define void @s_shuffle_v2i32_v8i32__u_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -18818,6 +18912,7 @@ define void @s_shuffle_v2i32_v8i32__u_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -18826,6 +18921,7 @@ define void @s_shuffle_v2i32_v8i32__u_6() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -19154,6 +19250,7 @@ define void @s_shuffle_v2i32_v8i32__8_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -19166,6 +19263,7 @@ define void @s_shuffle_v2i32_v8i32__8_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -19174,6 +19272,7 @@ define void @s_shuffle_v2i32_v8i32__8_6() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20293,6 +20392,7 @@ define void @s_shuffle_v2i32_v8i32__1_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20305,6 +20405,7 @@ define void @s_shuffle_v2i32_v8i32__1_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20313,6 +20414,7 @@ define void @s_shuffle_v2i32_v8i32__1_8() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20377,6 +20479,7 @@ define void @s_shuffle_v2i32_v8i32__3_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20389,6 +20492,7 @@ define void @s_shuffle_v2i32_v8i32__3_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20397,6 +20501,7 @@ define void @s_shuffle_v2i32_v8i32__3_8() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20457,9 +20562,10 @@ define void @s_shuffle_v2i32_v8i32__5_8() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -20469,9 +20575,10 @@ define void @s_shuffle_v2i32_v8i32__5_8() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -20479,6 +20586,7 @@ define void @s_shuffle_v2i32_v8i32__5_8() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20543,6 +20651,7 @@ define void @s_shuffle_v2i32_v8i32__7_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20555,6 +20664,7 @@ define void @s_shuffle_v2i32_v8i32__7_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20563,6 +20673,7 @@ define void @s_shuffle_v2i32_v8i32__7_8() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -21522,6 +21633,7 @@ define void @s_shuffle_v2i32_v8i32__u_10() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -21534,6 +21646,7 @@ define void @s_shuffle_v2i32_v8i32__u_10() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -21542,6 +21655,7 @@ define void @s_shuffle_v2i32_v8i32__u_10() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_10:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -23015,9 +23129,10 @@ define void @s_shuffle_v2i32_v8i32__u_12() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -23027,9 +23142,10 @@ define void @s_shuffle_v2i32_v8i32__u_12() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -23037,6 +23153,7 @@ define void @s_shuffle_v2i32_v8i32__u_12() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_12:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -24442,6 +24559,7 @@ define void @s_shuffle_v2i32_v8i32__u_14() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -24454,6 +24572,7 @@ define void @s_shuffle_v2i32_v8i32__u_14() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -24462,6 +24581,7 @@ define void @s_shuffle_v2i32_v8i32__u_14() {
;
; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_14:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
index 51dc9a51ec9d0..344a262a655ba 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
@@ -57,40 +57,44 @@ define void @v_shuffle_v2i64_v2i64__0_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2i64_v2i64__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -113,40 +117,44 @@ define void @v_shuffle_v2i64_v2i64__2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2i64_v2i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -374,10 +382,11 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -387,10 +396,11 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -400,10 +410,11 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -510,10 +521,11 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -523,10 +535,11 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -536,10 +549,11 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -763,40 +777,44 @@ define void @v_shuffle_v2i64_v2i64__0_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2i64_v2i64__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1065,6 +1083,7 @@ define void @s_shuffle_v2i64_v2i64__0_u() {
define void @s_shuffle_v2i64_v2i64__1_u() {
; GFX900-LABEL: s_shuffle_v2i64_v2i64__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1078,6 +1097,7 @@ define void @s_shuffle_v2i64_v2i64__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2i64_v2i64__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1091,6 +1111,7 @@ define void @s_shuffle_v2i64_v2i64__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1124,6 +1145,7 @@ define void @s_shuffle_v2i64_v2i64__2_u() {
define void @s_shuffle_v2i64_v2i64__3_u() {
; GFX900-LABEL: s_shuffle_v2i64_v2i64__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1137,6 +1159,7 @@ define void @s_shuffle_v2i64_v2i64__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2i64_v2i64__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1150,6 +1173,7 @@ define void @s_shuffle_v2i64_v2i64__3_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1358,6 +1382,7 @@ define void @s_shuffle_v2i64_v2i64__3_3() {
define void @s_shuffle_v2i64_v2i64__u_0() {
; GFX900-LABEL: s_shuffle_v2i64_v2i64__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1371,6 +1396,7 @@ define void @s_shuffle_v2i64_v2i64__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2i64_v2i64__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1384,6 +1410,7 @@ define void @s_shuffle_v2i64_v2i64__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i64_v2i64__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1473,6 +1500,7 @@ define void @s_shuffle_v2i64_v2i64__1_0() {
define void @s_shuffle_v2i64_v2i64__2_0() {
; GFX900-LABEL: s_shuffle_v2i64_v2i64__2_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1486,6 +1514,7 @@ define void @s_shuffle_v2i64_v2i64__2_0() {
;
; GFX90A-LABEL: s_shuffle_v2i64_v2i64__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1499,6 +1528,7 @@ define void @s_shuffle_v2i64_v2i64__2_0() {
;
; GFX942-LABEL: s_shuffle_v2i64_v2i64__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1711,6 +1741,7 @@ define void @s_shuffle_v2i64_v2i64__0_2() {
define void @s_shuffle_v2i64_v2i64__1_2() {
; GFX900-LABEL: s_shuffle_v2i64_v2i64__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1724,6 +1755,7 @@ define void @s_shuffle_v2i64_v2i64__1_2() {
;
; GFX90A-LABEL: s_shuffle_v2i64_v2i64__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1737,6 +1769,7 @@ define void @s_shuffle_v2i64_v2i64__1_2() {
;
; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
index bc8a56a30d8f9..1f9333b146f1c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
@@ -100,6 +100,7 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -113,6 +114,7 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -126,6 +128,7 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -196,6 +199,7 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -209,6 +213,7 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -222,6 +227,7 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -560,10 +566,11 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -573,10 +580,11 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -586,10 +594,11 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -746,10 +755,11 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -759,10 +769,11 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -772,10 +783,11 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1462,6 +1474,7 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -1475,6 +1488,7 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -1488,6 +1502,7 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -2206,6 +2221,7 @@ define void @s_shuffle_v2i64_v3i64__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -2219,6 +2235,7 @@ define void @s_shuffle_v2i64_v3i64__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -2228,6 +2245,7 @@ define void @s_shuffle_v2i64_v3i64__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2249,8 +2267,11 @@ define void @s_shuffle_v2i64_v3i64__2_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -2260,8 +2281,11 @@ define void @s_shuffle_v2i64_v3i64__2_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -2269,6 +2293,7 @@ define void @s_shuffle_v2i64_v3i64__2_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2306,6 +2331,7 @@ define void @s_shuffle_v2i64_v3i64__4_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -2319,6 +2345,7 @@ define void @s_shuffle_v2i64_v3i64__4_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -2328,6 +2355,7 @@ define void @s_shuffle_v2i64_v3i64__4_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2350,8 +2378,11 @@ define void @s_shuffle_v2i64_v3i64__5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -2361,8 +2392,11 @@ define void @s_shuffle_v2i64_v3i64__5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -2370,6 +2404,7 @@ define void @s_shuffle_v2i64_v3i64__5_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2681,6 +2716,7 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2694,6 +2730,7 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -2703,6 +2740,7 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2843,6 +2881,7 @@ define void @s_shuffle_v2i64_v3i64__3_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2856,6 +2895,7 @@ define void @s_shuffle_v2i64_v3i64__3_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -2865,6 +2905,7 @@ define void @s_shuffle_v2i64_v3i64__3_0() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3160,10 +3201,11 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3173,10 +3215,11 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3184,6 +3227,7 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3326,10 +3370,11 @@ define void @s_shuffle_v2i64_v3i64__3_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3339,10 +3384,11 @@ define void @s_shuffle_v2i64_v3i64__3_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3350,6 +3396,7 @@ define void @s_shuffle_v2i64_v3i64__3_2() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3488,6 +3535,7 @@ define void @s_shuffle_v2i64_v3i64__1_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3501,6 +3549,7 @@ define void @s_shuffle_v2i64_v3i64__1_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3510,6 +3559,7 @@ define void @s_shuffle_v2i64_v3i64__1_3() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3531,8 +3581,11 @@ define void @s_shuffle_v2i64_v3i64__2_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3542,8 +3595,11 @@ define void @s_shuffle_v2i64_v3i64__2_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3551,6 +3607,7 @@ define void @s_shuffle_v2i64_v3i64__2_3() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3905,10 +3962,11 @@ define void @s_shuffle_v2i64_v3i64__u_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3918,10 +3976,11 @@ define void @s_shuffle_v2i64_v3i64__u_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3929,6 +3988,7 @@ define void @s_shuffle_v2i64_v3i64__u_5() {
;
; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
index dd42a1dd44320..e52326bbd6353 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
@@ -139,6 +139,7 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -152,6 +153,7 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -165,6 +167,7 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -275,6 +278,7 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -288,6 +292,7 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -301,6 +306,7 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -741,10 +747,11 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,10 +761,11 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -767,10 +775,11 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -972,10 +981,11 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -985,10 +995,11 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -998,10 +1009,11 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2349,6 +2361,7 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -2362,6 +2375,7 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -2375,6 +2389,7 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -3739,6 +3754,7 @@ define void @s_shuffle_v2i64_v4i64__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3752,6 +3768,7 @@ define void @s_shuffle_v2i64_v4i64__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3761,6 +3778,7 @@ define void @s_shuffle_v2i64_v4i64__1_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -3822,10 +3840,11 @@ define void @s_shuffle_v2i64_v4i64__3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3835,10 +3854,11 @@ define void @s_shuffle_v2i64_v4i64__3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3846,6 +3866,7 @@ define void @s_shuffle_v2i64_v4i64__3_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -3883,6 +3904,7 @@ define void @s_shuffle_v2i64_v4i64__5_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3896,6 +3918,7 @@ define void @s_shuffle_v2i64_v4i64__5_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3905,6 +3928,7 @@ define void @s_shuffle_v2i64_v4i64__5_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -3968,10 +3992,11 @@ define void @s_shuffle_v2i64_v4i64__7_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3981,10 +4006,11 @@ define void @s_shuffle_v2i64_v4i64__7_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3992,6 +4018,7 @@ define void @s_shuffle_v2i64_v4i64__7_u() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -4394,6 +4421,7 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -4407,6 +4435,7 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4416,6 +4445,7 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -4579,6 +4609,7 @@ define void @s_shuffle_v2i64_v4i64__4_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -4592,6 +4623,7 @@ define void @s_shuffle_v2i64_v4i64__4_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4601,6 +4633,7 @@ define void @s_shuffle_v2i64_v4i64__4_0() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5028,10 +5061,11 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -5041,10 +5075,11 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -5052,6 +5087,7 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5213,10 +5249,11 @@ define void @s_shuffle_v2i64_v4i64__4_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -5226,10 +5263,11 @@ define void @s_shuffle_v2i64_v4i64__4_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -5237,6 +5275,7 @@ define void @s_shuffle_v2i64_v4i64__4_2() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5720,6 +5759,7 @@ define void @s_shuffle_v2i64_v4i64__1_4() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -5733,6 +5773,7 @@ define void @s_shuffle_v2i64_v4i64__1_4() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -5742,6 +5783,7 @@ define void @s_shuffle_v2i64_v4i64__1_4() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5803,10 +5845,11 @@ define void @s_shuffle_v2i64_v4i64__3_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -5816,10 +5859,11 @@ define void @s_shuffle_v2i64_v4i64__3_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -5827,6 +5871,7 @@ define void @s_shuffle_v2i64_v4i64__3_4() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -6277,10 +6322,11 @@ define void @s_shuffle_v2i64_v4i64__u_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -6290,10 +6336,11 @@ define void @s_shuffle_v2i64_v4i64__u_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -6301,6 +6348,7 @@ define void @s_shuffle_v2i64_v4i64__u_6() {
;
; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
index 7ee7c83e0122d..819e7876ed15e 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
@@ -295,10 +295,11 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v14
; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -308,6 +309,7 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v14
; GFX90A-NEXT: v_mov_b32_e32 v1, v15
@@ -321,6 +323,7 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v14
; GFX942-NEXT: v_mov_b32_e32 v1, v15
@@ -591,10 +594,11 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v14
; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -604,6 +608,7 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v14
; GFX90A-NEXT: v_mov_b32_e32 v1, v15
@@ -617,6 +622,7 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v14
; GFX942-NEXT: v_mov_b32_e32 v1, v15
@@ -1465,10 +1471,11 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1478,10 +1485,11 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1491,10 +1499,11 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1876,10 +1885,11 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1889,10 +1899,11 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1902,10 +1913,11 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -7917,10 +7929,11 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v14
; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7930,6 +7943,7 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v14
; GFX90A-NEXT: v_mov_b32_e32 v1, v15
@@ -7943,6 +7957,7 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v14
; GFX942-NEXT: v_mov_b32_e32 v1, v15
@@ -13931,6 +13946,7 @@ define void @s_shuffle_v2i64_v8i64__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13944,6 +13960,7 @@ define void @s_shuffle_v2i64_v8i64__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13957,6 +13974,7 @@ define void @s_shuffle_v2i64_v8i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -14014,10 +14032,11 @@ define void @s_shuffle_v2i64_v8i64__3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ; def s[8:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -14027,10 +14046,11 @@ define void @s_shuffle_v2i64_v8i64__3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ; def s[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -14042,6 +14062,7 @@ define void @s_shuffle_v2i64_v8i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -14099,44 +14120,19 @@ define void @s_shuffle_v2i64_v8i64__4_u() {
}
define void @s_shuffle_v2i64_v8i64__5_u() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 poison>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -14195,6 +14191,7 @@ define void @s_shuffle_v2i64_v8i64__7_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
@@ -14208,6 +14205,7 @@ define void @s_shuffle_v2i64_v8i64__7_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
@@ -14221,6 +14219,7 @@ define void @s_shuffle_v2i64_v8i64__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -14254,6 +14253,7 @@ define void @s_shuffle_v2i64_v8i64__9_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -14267,6 +14267,7 @@ define void @s_shuffle_v2i64_v8i64__9_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -14280,6 +14281,7 @@ define void @s_shuffle_v2i64_v8i64__9_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -14339,10 +14341,11 @@ define void @s_shuffle_v2i64_v8i64__11_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ; def s[8:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -14352,10 +14355,11 @@ define void @s_shuffle_v2i64_v8i64__11_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ; def s[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -14367,6 +14371,7 @@ define void @s_shuffle_v2i64_v8i64__11_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -14426,44 +14431,19 @@ define void @s_shuffle_v2i64_v8i64__12_u() {
}
define void @s_shuffle_v2i64_v8i64__13_u() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 poison>
@@ -14524,6 +14504,7 @@ define void @s_shuffle_v2i64_v8i64__15_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
@@ -14537,6 +14518,7 @@ define void @s_shuffle_v2i64_v8i64__15_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
@@ -14550,6 +14532,7 @@ define void @s_shuffle_v2i64_v8i64__15_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -15560,6 +15543,7 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -15573,6 +15557,7 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -15586,6 +15571,7 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -15947,6 +15933,7 @@ define void @s_shuffle_v2i64_v8i64__8_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -15960,6 +15947,7 @@ define void @s_shuffle_v2i64_v8i64__8_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -15973,6 +15961,7 @@ define void @s_shuffle_v2i64_v8i64__8_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -17127,10 +17116,11 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ; def s[8:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -17140,10 +17130,11 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ; def s[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -17155,6 +17146,7 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -17514,10 +17506,11 @@ define void @s_shuffle_v2i64_v8i64__8_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ; def s[8:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -17527,10 +17520,11 @@ define void @s_shuffle_v2i64_v8i64__8_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ; def s[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -17542,6 +17536,7 @@ define void @s_shuffle_v2i64_v8i64__8_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -18712,44 +18707,19 @@ define void @s_shuffle_v2i64_v8i64__14_3() {
}
define void @s_shuffle_v2i64_v8i64__u_4() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__u_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__u_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v8i64__u_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18979,44 +18949,19 @@ define void @s_shuffle_v2i64_v8i64__7_4() {
}
define void @s_shuffle_v2i64_v8i64__8_4() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 8, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -20419,6 +20364,7 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s16
; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -20432,6 +20378,7 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s16
; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -20445,6 +20392,7 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -20806,6 +20754,7 @@ define void @s_shuffle_v2i64_v8i64__8_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s16
; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -20819,6 +20768,7 @@ define void @s_shuffle_v2i64_v8i64__8_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s16
; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -20832,6 +20782,7 @@ define void @s_shuffle_v2i64_v8i64__8_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -22599,6 +22550,7 @@ define void @s_shuffle_v2i64_v8i64__1_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22612,6 +22564,7 @@ define void @s_shuffle_v2i64_v8i64__1_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22625,6 +22578,7 @@ define void @s_shuffle_v2i64_v8i64__1_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -22682,10 +22636,11 @@ define void @s_shuffle_v2i64_v8i64__3_8() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ; def s[8:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -22695,10 +22650,11 @@ define void @s_shuffle_v2i64_v8i64__3_8() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ; def s[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -22710,6 +22666,7 @@ define void @s_shuffle_v2i64_v8i64__3_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -22767,44 +22724,19 @@ define void @s_shuffle_v2i64_v8i64__4_8() {
}
define void @s_shuffle_v2i64_v8i64__5_8() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_8:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_8:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_8:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 8>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -22863,6 +22795,7 @@ define void @s_shuffle_v2i64_v8i64__7_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
@@ -22876,6 +22809,7 @@ define void @s_shuffle_v2i64_v8i64__7_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
@@ -22889,6 +22823,7 @@ define void @s_shuffle_v2i64_v8i64__7_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -24077,10 +24012,11 @@ define void @s_shuffle_v2i64_v8i64__u_10() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ; def s[8:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -24090,10 +24026,11 @@ define void @s_shuffle_v2i64_v8i64__u_10() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ; def s[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -24105,6 +24042,7 @@ define void @s_shuffle_v2i64_v8i64__u_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -25933,44 +25871,19 @@ define void @s_shuffle_v2i64_v8i64__14_11() {
}
define void @s_shuffle_v2i64_v8i64__u_12() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__u_12:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__u_12:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_12:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v8i64__u_12:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 poison, i32 12>
@@ -27792,6 +27705,7 @@ define void @s_shuffle_v2i64_v8i64__u_14() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s16
; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -27805,6 +27719,7 @@ define void @s_shuffle_v2i64_v8i64__u_14() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s16
; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -27818,6 +27733,7 @@ define void @s_shuffle_v2i64_v8i64__u_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
index 7f8f2dbbb09a1..442a0026bb890 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
@@ -57,40 +57,44 @@ define void @v_shuffle_v2p0_v2p0__0_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2p0_v2p0__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -113,40 +117,44 @@ define void @v_shuffle_v2p0_v2p0__2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2p0_v2p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -374,10 +382,11 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -387,10 +396,11 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -400,10 +410,11 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -510,10 +521,11 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -523,10 +535,11 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -536,10 +549,11 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -763,40 +777,44 @@ define void @v_shuffle_v2p0_v2p0__0_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2p0_v2p0__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1065,6 +1083,7 @@ define void @s_shuffle_v2p0_v2p0__0_u() {
define void @s_shuffle_v2p0_v2p0__1_u() {
; GFX900-LABEL: s_shuffle_v2p0_v2p0__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1078,6 +1097,7 @@ define void @s_shuffle_v2p0_v2p0__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2p0_v2p0__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1091,6 +1111,7 @@ define void @s_shuffle_v2p0_v2p0__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1124,6 +1145,7 @@ define void @s_shuffle_v2p0_v2p0__2_u() {
define void @s_shuffle_v2p0_v2p0__3_u() {
; GFX900-LABEL: s_shuffle_v2p0_v2p0__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1137,6 +1159,7 @@ define void @s_shuffle_v2p0_v2p0__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2p0_v2p0__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1150,6 +1173,7 @@ define void @s_shuffle_v2p0_v2p0__3_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1358,6 +1382,7 @@ define void @s_shuffle_v2p0_v2p0__3_3() {
define void @s_shuffle_v2p0_v2p0__u_0() {
; GFX900-LABEL: s_shuffle_v2p0_v2p0__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1371,6 +1396,7 @@ define void @s_shuffle_v2p0_v2p0__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2p0_v2p0__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1384,6 +1410,7 @@ define void @s_shuffle_v2p0_v2p0__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p0_v2p0__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1473,6 +1500,7 @@ define void @s_shuffle_v2p0_v2p0__1_0() {
define void @s_shuffle_v2p0_v2p0__2_0() {
; GFX900-LABEL: s_shuffle_v2p0_v2p0__2_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1486,6 +1514,7 @@ define void @s_shuffle_v2p0_v2p0__2_0() {
;
; GFX90A-LABEL: s_shuffle_v2p0_v2p0__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1499,6 +1528,7 @@ define void @s_shuffle_v2p0_v2p0__2_0() {
;
; GFX942-LABEL: s_shuffle_v2p0_v2p0__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -1711,6 +1741,7 @@ define void @s_shuffle_v2p0_v2p0__0_2() {
define void @s_shuffle_v2p0_v2p0__1_2() {
; GFX900-LABEL: s_shuffle_v2p0_v2p0__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -1724,6 +1755,7 @@ define void @s_shuffle_v2p0_v2p0__1_2() {
;
; GFX90A-LABEL: s_shuffle_v2p0_v2p0__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -1737,6 +1769,7 @@ define void @s_shuffle_v2p0_v2p0__1_2() {
;
; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
index 27a6cf11c4cb1..2455b7be778b3 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
@@ -100,6 +100,7 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -113,6 +114,7 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -126,6 +128,7 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -196,6 +199,7 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -209,6 +213,7 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -222,6 +227,7 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -560,10 +566,11 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -573,10 +580,11 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -586,10 +594,11 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -746,10 +755,11 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -759,10 +769,11 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -772,10 +783,11 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1462,6 +1474,7 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -1475,6 +1488,7 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -1488,6 +1502,7 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -2206,6 +2221,7 @@ define void @s_shuffle_v2p0_v3p0__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -2219,6 +2235,7 @@ define void @s_shuffle_v2p0_v3p0__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -2228,6 +2245,7 @@ define void @s_shuffle_v2p0_v3p0__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2249,8 +2267,11 @@ define void @s_shuffle_v2p0_v3p0__2_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -2260,8 +2281,11 @@ define void @s_shuffle_v2p0_v3p0__2_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -2269,6 +2293,7 @@ define void @s_shuffle_v2p0_v3p0__2_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2306,6 +2331,7 @@ define void @s_shuffle_v2p0_v3p0__4_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -2319,6 +2345,7 @@ define void @s_shuffle_v2p0_v3p0__4_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -2328,6 +2355,7 @@ define void @s_shuffle_v2p0_v3p0__4_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2350,8 +2378,11 @@ define void @s_shuffle_v2p0_v3p0__5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -2361,8 +2392,11 @@ define void @s_shuffle_v2p0_v3p0__5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -2370,6 +2404,7 @@ define void @s_shuffle_v2p0_v3p0__5_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2681,6 +2716,7 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2694,6 +2730,7 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -2703,6 +2740,7 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -2843,6 +2881,7 @@ define void @s_shuffle_v2p0_v3p0__3_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2856,6 +2895,7 @@ define void @s_shuffle_v2p0_v3p0__3_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -2865,6 +2905,7 @@ define void @s_shuffle_v2p0_v3p0__3_0() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3160,10 +3201,11 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3173,10 +3215,11 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3184,6 +3227,7 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3326,10 +3370,11 @@ define void @s_shuffle_v2p0_v3p0__3_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3339,10 +3384,11 @@ define void @s_shuffle_v2p0_v3p0__3_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3350,6 +3396,7 @@ define void @s_shuffle_v2p0_v3p0__3_2() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3488,6 +3535,7 @@ define void @s_shuffle_v2p0_v3p0__1_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3501,6 +3549,7 @@ define void @s_shuffle_v2p0_v3p0__1_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3510,6 +3559,7 @@ define void @s_shuffle_v2p0_v3p0__1_3() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3531,8 +3581,11 @@ define void @s_shuffle_v2p0_v3p0__2_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3542,8 +3595,11 @@ define void @s_shuffle_v2p0_v3p0__2_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3551,6 +3607,7 @@ define void @s_shuffle_v2p0_v3p0__2_3() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -3905,10 +3962,11 @@ define void @s_shuffle_v2p0_v3p0__u_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3918,10 +3976,11 @@ define void @s_shuffle_v2p0_v3p0__u_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3929,6 +3988,7 @@ define void @s_shuffle_v2p0_v3p0__u_5() {
;
; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
index ae31524ebaa7f..59caa3e76c000 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
@@ -139,6 +139,7 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -152,6 +153,7 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -165,6 +167,7 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -275,6 +278,7 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -288,6 +292,7 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -301,6 +306,7 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -741,10 +747,11 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,10 +761,11 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -767,10 +775,11 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -972,10 +981,11 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -985,10 +995,11 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -998,10 +1009,11 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2349,6 +2361,7 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -2362,6 +2375,7 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -2375,6 +2389,7 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -3739,6 +3754,7 @@ define void @s_shuffle_v2p0_v4p0__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3752,6 +3768,7 @@ define void @s_shuffle_v2p0_v4p0__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3761,6 +3778,7 @@ define void @s_shuffle_v2p0_v4p0__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -3822,10 +3840,11 @@ define void @s_shuffle_v2p0_v4p0__3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3835,10 +3854,11 @@ define void @s_shuffle_v2p0_v4p0__3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3846,6 +3866,7 @@ define void @s_shuffle_v2p0_v4p0__3_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -3883,6 +3904,7 @@ define void @s_shuffle_v2p0_v4p0__5_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3896,6 +3918,7 @@ define void @s_shuffle_v2p0_v4p0__5_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3905,6 +3928,7 @@ define void @s_shuffle_v2p0_v4p0__5_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -3968,10 +3992,11 @@ define void @s_shuffle_v2p0_v4p0__7_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3981,10 +4006,11 @@ define void @s_shuffle_v2p0_v4p0__7_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3992,6 +4018,7 @@ define void @s_shuffle_v2p0_v4p0__7_u() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -4394,6 +4421,7 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -4407,6 +4435,7 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4416,6 +4445,7 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -4579,6 +4609,7 @@ define void @s_shuffle_v2p0_v4p0__4_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -4592,6 +4623,7 @@ define void @s_shuffle_v2p0_v4p0__4_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4601,6 +4633,7 @@ define void @s_shuffle_v2p0_v4p0__4_0() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5028,10 +5061,11 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -5041,10 +5075,11 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -5052,6 +5087,7 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5213,10 +5249,11 @@ define void @s_shuffle_v2p0_v4p0__4_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -5226,10 +5263,11 @@ define void @s_shuffle_v2p0_v4p0__4_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -5237,6 +5275,7 @@ define void @s_shuffle_v2p0_v4p0__4_2() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5720,6 +5759,7 @@ define void @s_shuffle_v2p0_v4p0__1_4() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -5733,6 +5773,7 @@ define void @s_shuffle_v2p0_v4p0__1_4() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -5742,6 +5783,7 @@ define void @s_shuffle_v2p0_v4p0__1_4() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -5803,10 +5845,11 @@ define void @s_shuffle_v2p0_v4p0__3_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -5816,10 +5859,11 @@ define void @s_shuffle_v2p0_v4p0__3_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -5827,6 +5871,7 @@ define void @s_shuffle_v2p0_v4p0__3_4() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -6277,10 +6322,11 @@ define void @s_shuffle_v2p0_v4p0__u_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s8
-; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -6290,10 +6336,11 @@ define void @s_shuffle_v2p0_v4p0__u_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s8
-; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -6301,6 +6348,7 @@ define void @s_shuffle_v2p0_v4p0__u_6() {
;
; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll
index 299dfba482953..9fc76d404b0bb 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll
@@ -57,37 +57,41 @@ define void @v_shuffle_v2p3_v2p3__0_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2p3_v2p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v2p3__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p3_v2p3__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -110,37 +114,41 @@ define void @v_shuffle_v2p3_v2p3__2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2p3_v2p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v2p3__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p3_v2p3__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p3_v2p3__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -350,33 +358,37 @@ define void @v_shuffle_v2p3_v2p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p3_v2p3__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p3_v2p3__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -476,33 +488,37 @@ define void @v_shuffle_v2p3_v2p3__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p3_v2p3__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p3_v2p3__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -723,37 +739,41 @@ define void @v_shuffle_v2p3_v2p3__0_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v2p3_v2p3__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v2p3__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p3_v2p3__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1016,6 +1036,7 @@ define void @s_shuffle_v2p3_v2p3__0_u() {
define void @s_shuffle_v2p3_v2p3__1_u() {
; GFX900-LABEL: s_shuffle_v2p3_v2p3__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1028,6 +1049,7 @@ define void @s_shuffle_v2p3_v2p3__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v2p3__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1040,6 +1062,7 @@ define void @s_shuffle_v2p3_v2p3__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v2p3__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1072,6 +1095,7 @@ define void @s_shuffle_v2p3_v2p3__2_u() {
define void @s_shuffle_v2p3_v2p3__3_u() {
; GFX900-LABEL: s_shuffle_v2p3_v2p3__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1084,6 +1108,7 @@ define void @s_shuffle_v2p3_v2p3__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v2p3__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1096,6 +1121,7 @@ define void @s_shuffle_v2p3_v2p3__3_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v2p3__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1287,6 +1313,7 @@ define void @s_shuffle_v2p3_v2p3__3_3() {
define void @s_shuffle_v2p3_v2p3__u_0() {
; GFX900-LABEL: s_shuffle_v2p3_v2p3__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1299,6 +1326,7 @@ define void @s_shuffle_v2p3_v2p3__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v2p3__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1311,6 +1339,7 @@ define void @s_shuffle_v2p3_v2p3__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v2p3__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1392,6 +1421,7 @@ define void @s_shuffle_v2p3_v2p3__1_0() {
define void @s_shuffle_v2p3_v2p3__2_0() {
; GFX900-LABEL: s_shuffle_v2p3_v2p3__2_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1404,6 +1434,7 @@ define void @s_shuffle_v2p3_v2p3__2_0() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v2p3__2_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1416,6 +1447,7 @@ define void @s_shuffle_v2p3_v2p3__2_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v2p3__2_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -1626,6 +1658,7 @@ define void @s_shuffle_v2p3_v2p3__0_2() {
define void @s_shuffle_v2p3_v2p3__1_2() {
; GFX900-LABEL: s_shuffle_v2p3_v2p3__1_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -1638,6 +1671,7 @@ define void @s_shuffle_v2p3_v2p3__1_2() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v2p3__1_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -1650,6 +1684,7 @@ define void @s_shuffle_v2p3_v2p3__1_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v2p3__1_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll
index 13e3d94c35446..72efe2990ce82 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2p3_v3p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2p3_v3p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -102,6 +104,7 @@ define void @v_shuffle_v2p3_v3p3__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -114,6 +117,7 @@ define void @v_shuffle_v2p3_v3p3__2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -126,6 +130,7 @@ define void @v_shuffle_v2p3_v3p3__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -166,9 +171,10 @@ define void @v_shuffle_v2p3_v3p3__4_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -178,9 +184,10 @@ define void @v_shuffle_v2p3_v3p3__4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -197,6 +204,7 @@ define void @v_shuffle_v2p3_v3p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -209,6 +217,7 @@ define void @v_shuffle_v2p3_v3p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -221,6 +230,7 @@ define void @v_shuffle_v2p3_v3p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -536,9 +546,10 @@ define void @v_shuffle_v2p3_v3p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -548,9 +559,10 @@ define void @v_shuffle_v2p3_v3p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -560,9 +572,10 @@ define void @v_shuffle_v2p3_v3p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -706,9 +719,10 @@ define void @v_shuffle_v2p3_v3p3__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -718,9 +732,10 @@ define void @v_shuffle_v2p3_v3p3__3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -730,9 +745,10 @@ define void @v_shuffle_v2p3_v3p3__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1066,6 +1082,7 @@ define void @v_shuffle_v2p3_v3p3__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1078,6 +1095,7 @@ define void @v_shuffle_v2p3_v3p3__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1236,6 +1254,7 @@ define void @v_shuffle_v2p3_v3p3__3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1248,6 +1267,7 @@ define void @v_shuffle_v2p3_v3p3__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1381,9 +1401,10 @@ define void @v_shuffle_v2p3_v3p3__1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1393,9 +1414,10 @@ define void @v_shuffle_v2p3_v3p3__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1411,6 +1433,7 @@ define void @v_shuffle_v2p3_v3p3__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1423,6 +1446,7 @@ define void @v_shuffle_v2p3_v3p3__2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1435,6 +1459,7 @@ define void @v_shuffle_v2p3_v3p3__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -1800,6 +1825,7 @@ define void @v_shuffle_v2p3_v3p3__u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17]
@@ -1812,6 +1838,7 @@ define void @v_shuffle_v2p3_v3p3__u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
@@ -2129,6 +2156,7 @@ define void @s_shuffle_v2p3_v3p3__0_u() {
define void @s_shuffle_v2p3_v3p3__1_u() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2141,6 +2169,7 @@ define void @s_shuffle_v2p3_v3p3__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2153,6 +2182,7 @@ define void @s_shuffle_v2p3_v3p3__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2171,6 +2201,7 @@ define void @s_shuffle_v2p3_v3p3__1_u() {
define void @s_shuffle_v2p3_v3p3__2_u() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2183,6 +2214,7 @@ define void @s_shuffle_v2p3_v3p3__2_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2195,6 +2227,7 @@ define void @s_shuffle_v2p3_v3p3__2_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2227,6 +2260,7 @@ define void @s_shuffle_v2p3_v3p3__3_u() {
define void @s_shuffle_v2p3_v3p3__4_u() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2239,6 +2273,7 @@ define void @s_shuffle_v2p3_v3p3__4_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2251,6 +2286,7 @@ define void @s_shuffle_v2p3_v3p3__4_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2270,6 +2306,7 @@ define void @s_shuffle_v2p3_v3p3__4_u() {
define void @s_shuffle_v2p3_v3p3__5_u() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2282,6 +2319,7 @@ define void @s_shuffle_v2p3_v3p3__5_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2294,6 +2332,7 @@ define void @s_shuffle_v2p3_v3p3__5_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2586,6 +2625,7 @@ define void @s_shuffle_v2p3_v3p3__5_5() {
define void @s_shuffle_v2p3_v3p3__u_0() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2598,6 +2638,7 @@ define void @s_shuffle_v2p3_v3p3__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2610,6 +2651,7 @@ define void @s_shuffle_v2p3_v3p3__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -2736,6 +2778,7 @@ define void @s_shuffle_v2p3_v3p3__2_0() {
define void @s_shuffle_v2p3_v3p3__3_0() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__3_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -2748,6 +2791,7 @@ define void @s_shuffle_v2p3_v3p3__3_0() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__3_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -2760,6 +2804,7 @@ define void @s_shuffle_v2p3_v3p3__3_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__3_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3041,6 +3086,7 @@ define void @s_shuffle_v2p3_v3p3__4_1() {
define void @s_shuffle_v2p3_v3p3__u_2() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3053,6 +3099,7 @@ define void @s_shuffle_v2p3_v3p3__u_2() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3065,6 +3112,7 @@ define void @s_shuffle_v2p3_v3p3__u_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3191,6 +3239,7 @@ define void @s_shuffle_v2p3_v3p3__2_2() {
define void @s_shuffle_v2p3_v3p3__3_2() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__3_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3203,6 +3252,7 @@ define void @s_shuffle_v2p3_v3p3__3_2() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__3_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3215,6 +3265,7 @@ define void @s_shuffle_v2p3_v3p3__3_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__3_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3342,6 +3393,7 @@ define void @s_shuffle_v2p3_v3p3__0_3() {
define void @s_shuffle_v2p3_v3p3__1_3() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__1_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3354,6 +3406,7 @@ define void @s_shuffle_v2p3_v3p3__1_3() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__1_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3366,6 +3419,7 @@ define void @s_shuffle_v2p3_v3p3__1_3() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3384,6 +3438,7 @@ define void @s_shuffle_v2p3_v3p3__1_3() {
define void @s_shuffle_v2p3_v3p3__2_3() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__2_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3396,6 +3451,7 @@ define void @s_shuffle_v2p3_v3p3__2_3() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__2_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3408,6 +3464,7 @@ define void @s_shuffle_v2p3_v3p3__2_3() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -3743,6 +3800,7 @@ define void @s_shuffle_v2p3_v3p3__4_4() {
define void @s_shuffle_v2p3_v3p3__u_5() {
; GFX900-LABEL: s_shuffle_v2p3_v3p3__u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -3755,6 +3813,7 @@ define void @s_shuffle_v2p3_v3p3__u_5() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v3p3__u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -3767,6 +3826,7 @@ define void @s_shuffle_v2p3_v3p3__u_5() {
;
; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
index a9085502c7358..bf3bfaf9e9521 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2p3_v4p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2p3_v4p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -141,6 +143,7 @@ define void @v_shuffle_v2p3_v4p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -153,6 +156,7 @@ define void @v_shuffle_v2p3_v4p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -165,6 +169,7 @@ define void @v_shuffle_v2p3_v4p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -205,9 +210,10 @@ define void @v_shuffle_v2p3_v4p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -217,9 +223,10 @@ define void @v_shuffle_v2p3_v4p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -276,6 +283,7 @@ define void @v_shuffle_v2p3_v4p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -288,6 +296,7 @@ define void @v_shuffle_v2p3_v4p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -300,6 +309,7 @@ define void @v_shuffle_v2p3_v4p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -707,9 +717,10 @@ define void @v_shuffle_v2p3_v4p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -719,9 +730,10 @@ define void @v_shuffle_v2p3_v4p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -731,9 +743,10 @@ define void @v_shuffle_v2p3_v4p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -920,9 +933,10 @@ define void @v_shuffle_v2p3_v4p3__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -932,9 +946,10 @@ define void @v_shuffle_v2p3_v4p3__4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,9 +959,10 @@ define void @v_shuffle_v2p3_v4p3__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1428,6 +1444,7 @@ define void @v_shuffle_v2p3_v4p3__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -1440,6 +1457,7 @@ define void @v_shuffle_v2p3_v4p3__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -1638,6 +1656,7 @@ define void @v_shuffle_v2p3_v4p3__4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -1650,6 +1669,7 @@ define void @v_shuffle_v2p3_v4p3__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -2184,9 +2204,10 @@ define void @v_shuffle_v2p3_v4p3__1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2196,9 +2217,10 @@ define void @v_shuffle_v2p3_v4p3__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2253,6 +2275,7 @@ define void @v_shuffle_v2p3_v4p3__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2265,6 +2288,7 @@ define void @v_shuffle_v2p3_v4p3__3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2277,6 +2301,7 @@ define void @v_shuffle_v2p3_v4p3__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -2781,6 +2806,7 @@ define void @v_shuffle_v2p3_v4p3__u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
@@ -2793,6 +2819,7 @@ define void @v_shuffle_v2p3_v4p3__u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -3582,6 +3609,7 @@ define void @s_shuffle_v2p3_v4p3__0_u() {
define void @s_shuffle_v2p3_v4p3__1_u() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3594,6 +3622,7 @@ define void @s_shuffle_v2p3_v4p3__1_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3606,6 +3635,7 @@ define void @s_shuffle_v2p3_v4p3__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3666,6 +3696,7 @@ define void @s_shuffle_v2p3_v4p3__2_u() {
define void @s_shuffle_v2p3_v4p3__3_u() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3678,6 +3709,7 @@ define void @s_shuffle_v2p3_v4p3__3_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3690,6 +3722,7 @@ define void @s_shuffle_v2p3_v4p3__3_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3722,6 +3755,7 @@ define void @s_shuffle_v2p3_v4p3__4_u() {
define void @s_shuffle_v2p3_v4p3__5_u() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3734,6 +3768,7 @@ define void @s_shuffle_v2p3_v4p3__5_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3746,6 +3781,7 @@ define void @s_shuffle_v2p3_v4p3__5_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3808,6 +3844,7 @@ define void @s_shuffle_v2p3_v4p3__6_u() {
define void @s_shuffle_v2p3_v4p3__7_u() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3820,6 +3857,7 @@ define void @s_shuffle_v2p3_v4p3__7_u() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3832,6 +3870,7 @@ define void @s_shuffle_v2p3_v4p3__7_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4225,6 +4264,7 @@ define void @s_shuffle_v2p3_v4p3__7_7() {
define void @s_shuffle_v2p3_v4p3__u_0() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4237,6 +4277,7 @@ define void @s_shuffle_v2p3_v4p3__u_0() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4249,6 +4290,7 @@ define void @s_shuffle_v2p3_v4p3__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4420,6 +4462,7 @@ define void @s_shuffle_v2p3_v4p3__3_0() {
define void @s_shuffle_v2p3_v4p3__4_0() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__4_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4432,6 +4475,7 @@ define void @s_shuffle_v2p3_v4p3__4_0() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__4_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4444,6 +4488,7 @@ define void @s_shuffle_v2p3_v4p3__4_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4880,6 +4925,7 @@ define void @s_shuffle_v2p3_v4p3__6_1() {
define void @s_shuffle_v2p3_v4p3__u_2() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4892,6 +4938,7 @@ define void @s_shuffle_v2p3_v4p3__u_2() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4904,6 +4951,7 @@ define void @s_shuffle_v2p3_v4p3__u_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5075,6 +5123,7 @@ define void @s_shuffle_v2p3_v4p3__3_2() {
define void @s_shuffle_v2p3_v4p3__4_2() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__4_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5087,6 +5136,7 @@ define void @s_shuffle_v2p3_v4p3__4_2() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__4_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5099,6 +5149,7 @@ define void @s_shuffle_v2p3_v4p3__4_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5625,6 +5676,7 @@ define void @s_shuffle_v2p3_v4p3__0_4() {
define void @s_shuffle_v2p3_v4p3__1_4() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__1_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5637,6 +5689,7 @@ define void @s_shuffle_v2p3_v4p3__1_4() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__1_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5649,6 +5702,7 @@ define void @s_shuffle_v2p3_v4p3__1_4() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5709,6 +5763,7 @@ define void @s_shuffle_v2p3_v4p3__2_4() {
define void @s_shuffle_v2p3_v4p3__3_4() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__3_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5721,6 +5776,7 @@ define void @s_shuffle_v2p3_v4p3__3_4() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__3_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5733,6 +5789,7 @@ define void @s_shuffle_v2p3_v4p3__3_4() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -6215,6 +6272,7 @@ define void @s_shuffle_v2p3_v4p3__6_5() {
define void @s_shuffle_v2p3_v4p3__u_6() {
; GFX900-LABEL: s_shuffle_v2p3_v4p3__u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -6227,6 +6285,7 @@ define void @s_shuffle_v2p3_v4p3__u_6() {
;
; GFX90A-LABEL: s_shuffle_v2p3_v4p3__u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -6239,6 +6298,7 @@ define void @s_shuffle_v2p3_v4p3__u_6() {
;
; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
index 9174e92cd9c82..8bf6cd54b5d0f 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v2p3_v8p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v2p3_v8p3__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -152,6 +154,7 @@ define void @v_shuffle_v2p3_v8p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -164,6 +167,7 @@ define void @v_shuffle_v2p3_v8p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -232,6 +236,7 @@ define void @v_shuffle_v2p3_v8p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -244,6 +249,7 @@ define void @v_shuffle_v2p3_v8p3__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -301,6 +307,7 @@ define void @v_shuffle_v2p3_v8p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -313,6 +320,7 @@ define void @v_shuffle_v2p3_v8p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -325,6 +333,7 @@ define void @v_shuffle_v2p3_v8p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -365,9 +374,10 @@ define void @v_shuffle_v2p3_v8p3__9_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -377,9 +387,10 @@ define void @v_shuffle_v2p3_v8p3__9_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -447,6 +458,7 @@ define void @v_shuffle_v2p3_v8p3__11_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -459,6 +471,7 @@ define void @v_shuffle_v2p3_v8p3__11_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -529,6 +542,7 @@ define void @v_shuffle_v2p3_v8p3__13_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -541,6 +555,7 @@ define void @v_shuffle_v2p3_v8p3__13_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -600,6 +615,7 @@ define void @v_shuffle_v2p3_v8p3__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -612,6 +628,7 @@ define void @v_shuffle_v2p3_v8p3__15_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -624,6 +641,7 @@ define void @v_shuffle_v2p3_v8p3__15_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -1419,9 +1437,10 @@ define void @v_shuffle_v2p3_v8p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1431,9 +1450,10 @@ define void @v_shuffle_v2p3_v8p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1443,9 +1463,10 @@ define void @v_shuffle_v2p3_v8p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1804,9 +1825,10 @@ define void @v_shuffle_v2p3_v8p3__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1816,9 +1838,10 @@ define void @v_shuffle_v2p3_v8p3__8_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1828,9 +1851,10 @@ define void @v_shuffle_v2p3_v8p3__8_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2904,6 +2928,7 @@ define void @v_shuffle_v2p3_v8p3__u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -2916,6 +2941,7 @@ define void @v_shuffle_v2p3_v8p3__u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -3286,6 +3312,7 @@ define void @v_shuffle_v2p3_v8p3__8_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -3298,6 +3325,7 @@ define void @v_shuffle_v2p3_v8p3__8_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -4374,6 +4402,7 @@ define void @v_shuffle_v2p3_v8p3__u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -4386,6 +4415,7 @@ define void @v_shuffle_v2p3_v8p3__u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -4756,6 +4786,7 @@ define void @v_shuffle_v2p3_v8p3__8_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -4768,6 +4799,7 @@ define void @v_shuffle_v2p3_v8p3__8_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -5844,6 +5876,7 @@ define void @v_shuffle_v2p3_v8p3__u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -5856,6 +5889,7 @@ define void @v_shuffle_v2p3_v8p3__u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -6226,6 +6260,7 @@ define void @v_shuffle_v2p3_v8p3__8_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -6238,6 +6273,7 @@ define void @v_shuffle_v2p3_v8p3__8_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7364,9 +7400,10 @@ define void @v_shuffle_v2p3_v8p3__1_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7376,9 +7413,10 @@ define void @v_shuffle_v2p3_v8p3__1_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7444,6 +7482,7 @@ define void @v_shuffle_v2p3_v8p3__3_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7456,6 +7495,7 @@ define void @v_shuffle_v2p3_v8p3__3_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7524,6 +7564,7 @@ define void @v_shuffle_v2p3_v8p3__5_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v5
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7536,6 +7577,7 @@ define void @v_shuffle_v2p3_v8p3__5_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -7593,6 +7635,7 @@ define void @v_shuffle_v2p3_v8p3__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7605,6 +7648,7 @@ define void @v_shuffle_v2p3_v8p3__7_8(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -7617,6 +7661,7 @@ define void @v_shuffle_v2p3_v8p3__7_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -8681,6 +8726,7 @@ define void @v_shuffle_v2p3_v8p3__u_10(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -8693,6 +8739,7 @@ define void @v_shuffle_v2p3_v8p3__u_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -10213,6 +10260,7 @@ define void @v_shuffle_v2p3_v8p3__u_12(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -10225,6 +10273,7 @@ define void @v_shuffle_v2p3_v8p3__u_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -11745,6 +11794,7 @@ define void @v_shuffle_v2p3_v8p3__u_14(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
@@ -11757,6 +11807,7 @@ define void @v_shuffle_v2p3_v8p3__u_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1]
@@ -13322,6 +13373,7 @@ define void @s_shuffle_v2p3_v8p3__1_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13334,6 +13386,7 @@ define void @s_shuffle_v2p3_v8p3__1_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13342,6 +13395,7 @@ define void @s_shuffle_v2p3_v8p3__1_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13406,6 +13460,7 @@ define void @s_shuffle_v2p3_v8p3__3_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13418,6 +13473,7 @@ define void @s_shuffle_v2p3_v8p3__3_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13426,6 +13482,7 @@ define void @s_shuffle_v2p3_v8p3__3_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13486,9 +13543,10 @@ define void @s_shuffle_v2p3_v8p3__5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -13498,9 +13556,10 @@ define void @s_shuffle_v2p3_v8p3__5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -13508,6 +13567,7 @@ define void @s_shuffle_v2p3_v8p3__5_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13572,6 +13632,7 @@ define void @s_shuffle_v2p3_v8p3__7_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13584,6 +13645,7 @@ define void @s_shuffle_v2p3_v8p3__7_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13592,6 +13654,7 @@ define void @s_shuffle_v2p3_v8p3__7_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13628,6 +13691,7 @@ define void @s_shuffle_v2p3_v8p3__9_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13640,6 +13704,7 @@ define void @s_shuffle_v2p3_v8p3__9_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13648,6 +13713,7 @@ define void @s_shuffle_v2p3_v8p3__9_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13714,6 +13780,7 @@ define void @s_shuffle_v2p3_v8p3__11_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13726,6 +13793,7 @@ define void @s_shuffle_v2p3_v8p3__11_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13734,6 +13802,7 @@ define void @s_shuffle_v2p3_v8p3__11_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13796,9 +13865,10 @@ define void @s_shuffle_v2p3_v8p3__13_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -13808,9 +13878,10 @@ define void @s_shuffle_v2p3_v8p3__13_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -13818,6 +13889,7 @@ define void @s_shuffle_v2p3_v8p3__13_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13884,6 +13956,7 @@ define void @s_shuffle_v2p3_v8p3__15_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -13896,6 +13969,7 @@ define void @s_shuffle_v2p3_v8p3__15_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -13904,6 +13978,7 @@ define void @s_shuffle_v2p3_v8p3__15_u() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14679,6 +14754,7 @@ define void @s_shuffle_v2p3_v8p3__u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -14691,6 +14767,7 @@ define void @s_shuffle_v2p3_v8p3__u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -14699,6 +14776,7 @@ define void @s_shuffle_v2p3_v8p3__u_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15027,6 +15105,7 @@ define void @s_shuffle_v2p3_v8p3__8_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -15039,6 +15118,7 @@ define void @s_shuffle_v2p3_v8p3__8_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -15047,6 +15127,7 @@ define void @s_shuffle_v2p3_v8p3__8_0() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -16023,6 +16104,7 @@ define void @s_shuffle_v2p3_v8p3__u_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -16035,6 +16117,7 @@ define void @s_shuffle_v2p3_v8p3__u_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -16043,6 +16126,7 @@ define void @s_shuffle_v2p3_v8p3__u_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -16371,6 +16455,7 @@ define void @s_shuffle_v2p3_v8p3__8_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -16383,6 +16468,7 @@ define void @s_shuffle_v2p3_v8p3__8_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -16391,6 +16477,7 @@ define void @s_shuffle_v2p3_v8p3__8_2() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -17464,9 +17551,10 @@ define void @s_shuffle_v2p3_v8p3__u_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -17476,9 +17564,10 @@ define void @s_shuffle_v2p3_v8p3__u_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -17486,6 +17575,7 @@ define void @s_shuffle_v2p3_v8p3__u_4() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -17812,9 +17902,10 @@ define void @s_shuffle_v2p3_v8p3__8_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -17824,9 +17915,10 @@ define void @s_shuffle_v2p3_v8p3__8_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -17834,6 +17926,7 @@ define void @s_shuffle_v2p3_v8p3__8_4() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -18806,6 +18899,7 @@ define void @s_shuffle_v2p3_v8p3__u_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -18818,6 +18912,7 @@ define void @s_shuffle_v2p3_v8p3__u_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -18826,6 +18921,7 @@ define void @s_shuffle_v2p3_v8p3__u_6() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -19154,6 +19250,7 @@ define void @s_shuffle_v2p3_v8p3__8_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -19166,6 +19263,7 @@ define void @s_shuffle_v2p3_v8p3__8_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -19174,6 +19272,7 @@ define void @s_shuffle_v2p3_v8p3__8_6() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20293,6 +20392,7 @@ define void @s_shuffle_v2p3_v8p3__1_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20305,6 +20405,7 @@ define void @s_shuffle_v2p3_v8p3__1_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20313,6 +20414,7 @@ define void @s_shuffle_v2p3_v8p3__1_8() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20377,6 +20479,7 @@ define void @s_shuffle_v2p3_v8p3__3_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20389,6 +20492,7 @@ define void @s_shuffle_v2p3_v8p3__3_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20397,6 +20501,7 @@ define void @s_shuffle_v2p3_v8p3__3_8() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20457,9 +20562,10 @@ define void @s_shuffle_v2p3_v8p3__5_8() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s9
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s8, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -20469,9 +20575,10 @@ define void @s_shuffle_v2p3_v8p3__5_8() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s9
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s8, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -20479,6 +20586,7 @@ define void @s_shuffle_v2p3_v8p3__5_8() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20543,6 +20651,7 @@ define void @s_shuffle_v2p3_v8p3__7_8() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s8, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -20555,6 +20664,7 @@ define void @s_shuffle_v2p3_v8p3__7_8() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s8, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -20563,6 +20673,7 @@ define void @s_shuffle_v2p3_v8p3__7_8() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_8:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -21522,6 +21633,7 @@ define void @s_shuffle_v2p3_v8p3__u_10() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -21534,6 +21646,7 @@ define void @s_shuffle_v2p3_v8p3__u_10() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -21542,6 +21655,7 @@ define void @s_shuffle_v2p3_v8p3__u_10() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_10:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -23015,9 +23129,10 @@ define void @s_shuffle_v2p3_v8p3__u_12() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s9, s8
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX900-NEXT: s_mov_b32 s9, s12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
; GFX900-NEXT: ;;#ASMEND
@@ -23027,9 +23142,10 @@ define void @s_shuffle_v2p3_v8p3__u_12() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s9, s8
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
; GFX90A-NEXT: ;;#ASMEND
@@ -23037,6 +23153,7 @@ define void @s_shuffle_v2p3_v8p3__u_12() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_12:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -24442,6 +24559,7 @@ define void @s_shuffle_v2p3_v8p3__u_14() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_mov_b32 s9, s10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:9]
@@ -24454,6 +24572,7 @@ define void @s_shuffle_v2p3_v8p3__u_14() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_mov_b32 s9, s10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:9]
@@ -24462,6 +24581,7 @@ define void @s_shuffle_v2p3_v8p3__u_14() {
;
; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_14:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll
index cd4dbe93e8a11..88d9517c34e1d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll
@@ -4857,6 +4857,7 @@ define void @s_shuffle_v3bf16_v3bf16__1_u_u() {
define void @s_shuffle_v3bf16_v3bf16__2_u_u() {
; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4869,6 +4870,7 @@ define void @s_shuffle_v3bf16_v3bf16__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4881,6 +4883,7 @@ define void @s_shuffle_v3bf16_v3bf16__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4963,6 +4966,7 @@ define void @s_shuffle_v3bf16_v3bf16__4_u_u() {
define void @s_shuffle_v3bf16_v3bf16__5_u_u() {
; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4975,6 +4979,7 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4987,6 +4992,7 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -7451,6 +7457,7 @@ define void @s_shuffle_v3bf16_v3bf16__1_3_3() {
define void @s_shuffle_v3bf16_v3bf16__2_3_3() {
; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7463,6 +7470,7 @@ define void @s_shuffle_v3bf16_v3bf16__2_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7475,6 +7483,7 @@ define void @s_shuffle_v3bf16_v3bf16__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll
index 311ca98227da3..246f5ead02f22 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll
@@ -7999,6 +7999,7 @@ define void @s_shuffle_v3bf16_v4bf16__1_u_u() {
define void @s_shuffle_v3bf16_v4bf16__2_u_u() {
; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -8011,6 +8012,7 @@ define void @s_shuffle_v3bf16_v4bf16__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -8023,6 +8025,7 @@ define void @s_shuffle_v3bf16_v4bf16__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -8144,6 +8147,7 @@ define void @s_shuffle_v3bf16_v4bf16__5_u_u() {
define void @s_shuffle_v3bf16_v4bf16__6_u_u() {
; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -8156,6 +8160,7 @@ define void @s_shuffle_v3bf16_v4bf16__6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -8168,6 +8173,7 @@ define void @s_shuffle_v3bf16_v4bf16__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -12442,6 +12448,7 @@ define void @s_shuffle_v3bf16_v4bf16__1_4_4() {
define void @s_shuffle_v3bf16_v4bf16__2_4_4() {
; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12454,6 +12461,7 @@ define void @s_shuffle_v3bf16_v4bf16__2_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12466,6 +12474,7 @@ define void @s_shuffle_v3bf16_v4bf16__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll
index 0854ff2ebfc5d..94453d53c9843 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll
@@ -4857,6 +4857,7 @@ define void @s_shuffle_v3f16_v3f16__1_u_u() {
define void @s_shuffle_v3f16_v3f16__2_u_u() {
; GFX900-LABEL: s_shuffle_v3f16_v3f16__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4869,6 +4870,7 @@ define void @s_shuffle_v3f16_v3f16__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f16_v3f16__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4881,6 +4883,7 @@ define void @s_shuffle_v3f16_v3f16__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4963,6 +4966,7 @@ define void @s_shuffle_v3f16_v3f16__4_u_u() {
define void @s_shuffle_v3f16_v3f16__5_u_u() {
; GFX900-LABEL: s_shuffle_v3f16_v3f16__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4975,6 +4979,7 @@ define void @s_shuffle_v3f16_v3f16__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f16_v3f16__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4987,6 +4992,7 @@ define void @s_shuffle_v3f16_v3f16__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -7451,6 +7457,7 @@ define void @s_shuffle_v3f16_v3f16__1_3_3() {
define void @s_shuffle_v3f16_v3f16__2_3_3() {
; GFX900-LABEL: s_shuffle_v3f16_v3f16__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7463,6 +7470,7 @@ define void @s_shuffle_v3f16_v3f16__2_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3f16_v3f16__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7475,6 +7483,7 @@ define void @s_shuffle_v3f16_v3f16__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll
index ecc7ff618932b..3907643c47561 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll
@@ -7999,6 +7999,7 @@ define void @s_shuffle_v3f16_v4f16__1_u_u() {
define void @s_shuffle_v3f16_v4f16__2_u_u() {
; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -8011,6 +8012,7 @@ define void @s_shuffle_v3f16_v4f16__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -8023,6 +8025,7 @@ define void @s_shuffle_v3f16_v4f16__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -8144,6 +8147,7 @@ define void @s_shuffle_v3f16_v4f16__5_u_u() {
define void @s_shuffle_v3f16_v4f16__6_u_u() {
; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -8156,6 +8160,7 @@ define void @s_shuffle_v3f16_v4f16__6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -8168,6 +8173,7 @@ define void @s_shuffle_v3f16_v4f16__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -12442,6 +12448,7 @@ define void @s_shuffle_v3f16_v4f16__1_4_4() {
define void @s_shuffle_v3f16_v4f16__2_4_4() {
; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12454,6 +12461,7 @@ define void @s_shuffle_v3f16_v4f16__2_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12466,6 +12474,7 @@ define void @s_shuffle_v3f16_v4f16__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
index 430f64164d24f..264963bf3443c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
@@ -59,11 +59,12 @@ define void @v_shuffle_v3f32_v2f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -71,11 +72,12 @@ define void @v_shuffle_v3f32_v2f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -83,11 +85,12 @@ define void @v_shuffle_v3f32_v2f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -112,11 +115,12 @@ define void @v_shuffle_v3f32_v2f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -124,11 +128,12 @@ define void @v_shuffle_v3f32_v2f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -136,11 +141,12 @@ define void @v_shuffle_v3f32_v2f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -170,15 +176,16 @@ define void @v_shuffle_v3f32_v2f32__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -186,15 +193,16 @@ define void @v_shuffle_v3f32_v2f32__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -272,28 +280,30 @@ define void @v_shuffle_v3f32_v2f32__3_2_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3f32_v2f32__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -560,26 +570,29 @@ define void @v_shuffle_v3f32_v2f32__u_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3f32_v2f32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v2f32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -695,26 +708,29 @@ define void @v_shuffle_v3f32_v2f32__2_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3f32_v2f32__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v2f32__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -784,15 +800,16 @@ define void @v_shuffle_v3f32_v2f32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -800,14 +817,16 @@ define void @v_shuffle_v3f32_v2f32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -815,14 +834,16 @@ define void @v_shuffle_v3f32_v2f32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -1192,15 +1213,16 @@ define void @v_shuffle_v3f32_v2f32__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1208,15 +1230,16 @@ define void @v_shuffle_v3f32_v2f32__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -1394,11 +1417,12 @@ define void @v_shuffle_v3f32_v2f32__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1406,11 +1430,12 @@ define void @v_shuffle_v3f32_v2f32__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1418,11 +1443,12 @@ define void @v_shuffle_v3f32_v2f32__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -1492,39 +1518,44 @@ define void @v_shuffle_v3f32_v2f32__3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v2f32__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v2f32__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v2f32__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -1857,12 +1888,13 @@ define void @v_shuffle_v3f32_v2f32__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1870,12 +1902,13 @@ define void @v_shuffle_v3f32_v2f32__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -2101,6 +2134,7 @@ define void @s_shuffle_v3f32_v2f32__0_u_u() {
define void @s_shuffle_v3f32_v2f32__1_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2113,6 +2147,7 @@ define void @s_shuffle_v3f32_v2f32__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2125,6 +2160,7 @@ define void @s_shuffle_v3f32_v2f32__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2157,6 +2193,7 @@ define void @s_shuffle_v3f32_v2f32__2_u_u() {
define void @s_shuffle_v3f32_v2f32__3_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2169,6 +2206,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2181,6 +2219,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2200,6 +2239,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_u() {
define void @s_shuffle_v3f32_v2f32__3_0_u() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2216,6 +2256,7 @@ define void @s_shuffle_v3f32_v2f32__3_0_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2232,6 +2273,7 @@ define void @s_shuffle_v3f32_v2f32__3_0_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2307,6 +2349,7 @@ define void @s_shuffle_v3f32_v2f32__3_1_u() {
define void @s_shuffle_v3f32_v2f32__3_2_u() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2320,6 +2363,7 @@ define void @s_shuffle_v3f32_v2f32__3_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2333,6 +2377,7 @@ define void @s_shuffle_v3f32_v2f32__3_2_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2551,6 +2596,7 @@ define void @s_shuffle_v3f32_v2f32__3_3_3() {
define void @s_shuffle_v3f32_v2f32__u_0_0() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2564,6 +2610,7 @@ define void @s_shuffle_v3f32_v2f32__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2577,6 +2624,7 @@ define void @s_shuffle_v3f32_v2f32__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2663,6 +2711,7 @@ define void @s_shuffle_v3f32_v2f32__1_0_0() {
define void @s_shuffle_v3f32_v2f32__2_0_0() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__2_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2676,6 +2725,7 @@ define void @s_shuffle_v3f32_v2f32__2_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2689,6 +2739,7 @@ define void @s_shuffle_v3f32_v2f32__2_0_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2766,6 +2817,7 @@ define void @s_shuffle_v3f32_v2f32__3_0_0() {
define void @s_shuffle_v3f32_v2f32__3_u_0() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2782,6 +2834,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2798,6 +2851,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3065,6 +3119,7 @@ define void @s_shuffle_v3f32_v2f32__3_1_1() {
define void @s_shuffle_v3f32_v2f32__3_u_1() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3081,6 +3136,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_1() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3097,6 +3153,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_1() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3290,6 +3347,7 @@ define void @s_shuffle_v3f32_v2f32__0_2_2() {
define void @s_shuffle_v3f32_v2f32__1_2_2() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3302,6 +3360,7 @@ define void @s_shuffle_v3f32_v2f32__1_2_2() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3314,6 +3373,7 @@ define void @s_shuffle_v3f32_v2f32__1_2_2() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3395,6 +3455,7 @@ define void @s_shuffle_v3f32_v2f32__3_2_2() {
define void @s_shuffle_v3f32_v2f32__3_u_2() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3408,6 +3469,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3421,6 +3483,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_2() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3702,6 +3765,7 @@ define void @s_shuffle_v3f32_v2f32__2_3_3() {
define void @s_shuffle_v3f32_v2f32__3_u_3() {
; GFX900-LABEL: s_shuffle_v3f32_v2f32__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3715,6 +3779,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v2f32__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3728,6 +3793,7 @@ define void @s_shuffle_v3f32_v2f32__3_u_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
index ef670e963bdb6..7cc913cf5fcf6 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
@@ -61,9 +61,10 @@ define void @v_shuffle_v3f32_v3f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -73,9 +74,10 @@ define void @v_shuffle_v3f32_v3f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -85,9 +87,10 @@ define void @v_shuffle_v3f32_v3f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -99,37 +102,41 @@ define void @v_shuffle_v3f32_v3f32__1_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -156,9 +163,10 @@ define void @v_shuffle_v3f32_v3f32__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -168,9 +176,10 @@ define void @v_shuffle_v3f32_v3f32__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -180,9 +189,10 @@ define void @v_shuffle_v3f32_v3f32__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -195,37 +205,41 @@ define void @v_shuffle_v3f32_v3f32__4_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -242,13 +256,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -258,13 +273,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -274,14 +290,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -347,49 +363,53 @@ define void @v_shuffle_v3f32_v3f32__5_1_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v2
+; GFX900-NEXT: global_store_dwordx3 v9, v[6:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[8:10], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[8:10], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -402,39 +422,44 @@ define void @v_shuffle_v3f32_v3f32__5_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -490,40 +515,44 @@ define void @v_shuffle_v3f32_v3f32__5_4_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -859,10 +888,11 @@ define void @v_shuffle_v3f32_v3f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,10 +902,11 @@ define void @v_shuffle_v3f32_v3f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -885,10 +916,11 @@ define void @v_shuffle_v3f32_v3f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1043,10 +1075,11 @@ define void @v_shuffle_v3f32_v3f32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1056,10 +1089,11 @@ define void @v_shuffle_v3f32_v3f32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1069,10 +1103,11 @@ define void @v_shuffle_v3f32_v3f32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1206,13 +1241,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1222,13 +1258,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1238,14 +1275,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1818,13 +1855,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,13 +1872,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1850,14 +1889,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2754,9 +2793,10 @@ define void @v_shuffle_v3f32_v3f32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2766,9 +2806,10 @@ define void @v_shuffle_v3f32_v3f32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2778,9 +2819,10 @@ define void @v_shuffle_v3f32_v3f32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2792,37 +2834,41 @@ define void @v_shuffle_v3f32_v3f32__1_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2941,39 +2987,44 @@ define void @v_shuffle_v3f32_v3f32__5_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3544,40 +3595,44 @@ define void @v_shuffle_v3f32_v3f32__5_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v3f32__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4462,6 +4517,7 @@ define void @s_shuffle_v3f32_v3f32__0_u_u() {
define void @s_shuffle_v3f32_v3f32__1_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4474,6 +4530,7 @@ define void @s_shuffle_v3f32_v3f32__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4486,6 +4543,7 @@ define void @s_shuffle_v3f32_v3f32__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4504,6 +4562,7 @@ define void @s_shuffle_v3f32_v3f32__1_u_u() {
define void @s_shuffle_v3f32_v3f32__2_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4516,6 +4575,7 @@ define void @s_shuffle_v3f32_v3f32__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4528,6 +4588,7 @@ define void @s_shuffle_v3f32_v3f32__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4560,6 +4621,7 @@ define void @s_shuffle_v3f32_v3f32__3_u_u() {
define void @s_shuffle_v3f32_v3f32__4_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4572,6 +4634,7 @@ define void @s_shuffle_v3f32_v3f32__4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4584,6 +4647,7 @@ define void @s_shuffle_v3f32_v3f32__4_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4603,6 +4667,7 @@ define void @s_shuffle_v3f32_v3f32__4_u_u() {
define void @s_shuffle_v3f32_v3f32__5_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4615,6 +4680,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4627,6 +4693,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4646,14 +4713,15 @@ define void @s_shuffle_v3f32_v3f32__5_u_u() {
define void @s_shuffle_v3f32_v3f32__5_0_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -4662,14 +4730,15 @@ define void @s_shuffle_v3f32_v3f32__5_0_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -4678,6 +4747,7 @@ define void @s_shuffle_v3f32_v3f32__5_0_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4753,14 +4823,15 @@ define void @s_shuffle_v3f32_v3f32__5_1_u() {
define void @s_shuffle_v3f32_v3f32__5_2_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -4769,14 +4840,15 @@ define void @s_shuffle_v3f32_v3f32__5_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -4785,6 +4857,7 @@ define void @s_shuffle_v3f32_v3f32__5_2_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4808,6 +4881,7 @@ define void @s_shuffle_v3f32_v3f32__5_2_u() {
define void @s_shuffle_v3f32_v3f32__5_3_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4821,6 +4895,7 @@ define void @s_shuffle_v3f32_v3f32__5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4834,6 +4909,7 @@ define void @s_shuffle_v3f32_v3f32__5_3_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4873,6 +4949,7 @@ define void @s_shuffle_v3f32_v3f32__5_4_u() {
define void @s_shuffle_v3f32_v3f32__5_5_u() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4886,6 +4963,7 @@ define void @s_shuffle_v3f32_v3f32__5_5_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4899,6 +4977,7 @@ define void @s_shuffle_v3f32_v3f32__5_5_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5208,6 +5287,7 @@ define void @s_shuffle_v3f32_v3f32__5_5_5() {
define void @s_shuffle_v3f32_v3f32__u_0_0() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -5221,6 +5301,7 @@ define void @s_shuffle_v3f32_v3f32__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -5234,6 +5315,7 @@ define void @s_shuffle_v3f32_v3f32__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5368,6 +5450,7 @@ define void @s_shuffle_v3f32_v3f32__2_0_0() {
define void @s_shuffle_v3f32_v3f32__3_0_0() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__3_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -5381,6 +5464,7 @@ define void @s_shuffle_v3f32_v3f32__3_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__3_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -5394,6 +5478,7 @@ define void @s_shuffle_v3f32_v3f32__3_0_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__3_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5529,14 +5614,15 @@ define void @s_shuffle_v3f32_v3f32__5_0_0() {
define void @s_shuffle_v3f32_v3f32__5_u_0() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -5545,14 +5631,15 @@ define void @s_shuffle_v3f32_v3f32__5_u_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -5561,6 +5648,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6015,14 +6103,15 @@ define void @s_shuffle_v3f32_v3f32__5_1_1() {
define void @s_shuffle_v3f32_v3f32__5_u_1() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -6031,14 +6120,15 @@ define void @s_shuffle_v3f32_v3f32__5_u_1() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -6047,6 +6137,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_1() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6827,6 +6918,7 @@ define void @s_shuffle_v3f32_v3f32__0_3_3() {
define void @s_shuffle_v3f32_v3f32__1_3_3() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__1_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6839,6 +6931,7 @@ define void @s_shuffle_v3f32_v3f32__1_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__1_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6851,6 +6944,7 @@ define void @s_shuffle_v3f32_v3f32__1_3_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6869,6 +6963,7 @@ define void @s_shuffle_v3f32_v3f32__1_3_3() {
define void @s_shuffle_v3f32_v3f32__2_3_3() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6881,6 +6976,7 @@ define void @s_shuffle_v3f32_v3f32__2_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6893,6 +6989,7 @@ define void @s_shuffle_v3f32_v3f32__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7023,6 +7120,7 @@ define void @s_shuffle_v3f32_v3f32__5_3_3() {
define void @s_shuffle_v3f32_v3f32__5_u_3() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7036,6 +7134,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7049,6 +7148,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7532,6 +7632,7 @@ define void @s_shuffle_v3f32_v3f32__5_4_4() {
define void @s_shuffle_v3f32_v3f32__5_u_4() {
; GFX900-LABEL: s_shuffle_v3f32_v3f32__5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7545,6 +7646,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v3f32__5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7558,6 +7660,7 @@ define void @s_shuffle_v3f32_v3f32__5_u_4() {
;
; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
index 50c69de069986..8f3644076079b 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v3f32_v4f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v3f32_v4f32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -102,33 +104,37 @@ define void @v_shuffle_v3f32_v4f32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -144,6 +150,7 @@ define void @v_shuffle_v3f32_v4f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -156,6 +163,7 @@ define void @v_shuffle_v3f32_v4f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -168,6 +176,7 @@ define void @v_shuffle_v3f32_v4f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -208,9 +217,10 @@ define void @v_shuffle_v3f32_v4f32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -220,9 +230,10 @@ define void @v_shuffle_v3f32_v4f32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -239,33 +250,37 @@ define void @v_shuffle_v3f32_v4f32__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -282,6 +297,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -294,6 +310,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -306,6 +323,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -328,6 +346,7 @@ define void @v_shuffle_v3f32_v4f32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v2, v0
@@ -344,6 +363,7 @@ define void @v_shuffle_v3f32_v4f32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
@@ -361,6 +381,7 @@ define void @v_shuffle_v3f32_v4f32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v3, v0
@@ -447,15 +468,16 @@ define void @v_shuffle_v3f32_v4f32__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -463,15 +485,16 @@ define void @v_shuffle_v3f32_v4f32__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -488,6 +511,7 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
@@ -504,6 +528,7 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
@@ -520,6 +545,7 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
@@ -539,40 +565,44 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v4f32__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -640,27 +670,30 @@ define void @v_shuffle_v3f32_v4f32__7_6_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_6_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -677,6 +710,7 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: v_mov_b32_e32 v1, v3
@@ -690,6 +724,7 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
@@ -703,6 +738,7 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: v_mov_b32_e32 v1, v3
@@ -1141,10 +1177,11 @@ define void @v_shuffle_v3f32_v4f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1154,10 +1191,11 @@ define void @v_shuffle_v3f32_v4f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1167,10 +1205,11 @@ define void @v_shuffle_v3f32_v4f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1372,10 +1411,11 @@ define void @v_shuffle_v3f32_v4f32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1385,10 +1425,11 @@ define void @v_shuffle_v3f32_v4f32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1398,10 +1439,11 @@ define void @v_shuffle_v3f32_v4f32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1596,6 +1638,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v0
@@ -1612,6 +1655,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
@@ -1629,6 +1673,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -2425,10 +2470,11 @@ define void @v_shuffle_v3f32_v4f32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2441,10 +2487,11 @@ define void @v_shuffle_v3f32_v4f32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2458,10 +2505,11 @@ define void @v_shuffle_v3f32_v4f32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3645,6 +3693,7 @@ define void @v_shuffle_v3f32_v4f32__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -3658,6 +3707,7 @@ define void @v_shuffle_v3f32_v4f32__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -3874,6 +3924,7 @@ define void @v_shuffle_v3f32_v4f32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -3887,6 +3938,7 @@ define void @v_shuffle_v3f32_v4f32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -4092,6 +4144,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
@@ -4108,6 +4161,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
@@ -4529,9 +4583,10 @@ define void @v_shuffle_v3f32_v4f32__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4541,9 +4596,10 @@ define void @v_shuffle_v3f32_v4f32__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4559,33 +4615,37 @@ define void @v_shuffle_v3f32_v4f32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4601,6 +4661,7 @@ define void @v_shuffle_v3f32_v4f32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -4613,6 +4674,7 @@ define void @v_shuffle_v3f32_v4f32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -4625,6 +4687,7 @@ define void @v_shuffle_v3f32_v4f32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -4794,40 +4857,44 @@ define void @v_shuffle_v3f32_v4f32__7_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v4f32__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5608,40 +5675,44 @@ define void @v_shuffle_v3f32_v4f32__7_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3f32_v4f32__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6800,6 +6871,7 @@ define void @v_shuffle_v3f32_v4f32__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -6813,6 +6885,7 @@ define void @v_shuffle_v3f32_v4f32__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -7213,6 +7286,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -7226,6 +7300,7 @@ define void @v_shuffle_v3f32_v4f32__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -7666,6 +7741,7 @@ define void @s_shuffle_v3f32_v4f32__0_u_u() {
define void @s_shuffle_v3f32_v4f32__1_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7678,6 +7754,7 @@ define void @s_shuffle_v3f32_v4f32__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7690,6 +7767,7 @@ define void @s_shuffle_v3f32_v4f32__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7708,6 +7786,7 @@ define void @s_shuffle_v3f32_v4f32__1_u_u() {
define void @s_shuffle_v3f32_v4f32__2_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7720,6 +7799,7 @@ define void @s_shuffle_v3f32_v4f32__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7732,6 +7812,7 @@ define void @s_shuffle_v3f32_v4f32__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7750,6 +7831,7 @@ define void @s_shuffle_v3f32_v4f32__2_u_u() {
define void @s_shuffle_v3f32_v4f32__3_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7762,6 +7844,7 @@ define void @s_shuffle_v3f32_v4f32__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7774,6 +7857,7 @@ define void @s_shuffle_v3f32_v4f32__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7806,6 +7890,7 @@ define void @s_shuffle_v3f32_v4f32__4_u_u() {
define void @s_shuffle_v3f32_v4f32__5_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7818,6 +7903,7 @@ define void @s_shuffle_v3f32_v4f32__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7830,6 +7916,7 @@ define void @s_shuffle_v3f32_v4f32__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7849,6 +7936,7 @@ define void @s_shuffle_v3f32_v4f32__5_u_u() {
define void @s_shuffle_v3f32_v4f32__6_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7861,6 +7949,7 @@ define void @s_shuffle_v3f32_v4f32__6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7873,6 +7962,7 @@ define void @s_shuffle_v3f32_v4f32__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7892,6 +7982,7 @@ define void @s_shuffle_v3f32_v4f32__6_u_u() {
define void @s_shuffle_v3f32_v4f32__7_u_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7904,6 +7995,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7916,6 +8008,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7939,6 +8032,7 @@ define void @s_shuffle_v3f32_v4f32__7_0_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -7955,6 +8049,7 @@ define void @s_shuffle_v3f32_v4f32__7_0_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -7967,6 +8062,7 @@ define void @s_shuffle_v3f32_v4f32__7_0_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8046,6 +8142,7 @@ define void @s_shuffle_v3f32_v4f32__7_2_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -8062,6 +8159,7 @@ define void @s_shuffle_v3f32_v4f32__7_2_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -8074,6 +8172,7 @@ define void @s_shuffle_v3f32_v4f32__7_2_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8101,6 +8200,7 @@ define void @s_shuffle_v3f32_v4f32__7_3_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -8117,6 +8217,7 @@ define void @s_shuffle_v3f32_v4f32__7_3_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -8129,6 +8230,7 @@ define void @s_shuffle_v3f32_v4f32__7_3_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8152,6 +8254,7 @@ define void @s_shuffle_v3f32_v4f32__7_3_u() {
define void @s_shuffle_v3f32_v4f32__7_4_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8165,6 +8268,7 @@ define void @s_shuffle_v3f32_v4f32__7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8178,6 +8282,7 @@ define void @s_shuffle_v3f32_v4f32__7_4_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8217,6 +8322,7 @@ define void @s_shuffle_v3f32_v4f32__7_5_u() {
define void @s_shuffle_v3f32_v4f32__7_6_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_6_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8230,6 +8336,7 @@ define void @s_shuffle_v3f32_v4f32__7_6_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_6_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8243,6 +8350,7 @@ define void @s_shuffle_v3f32_v4f32__7_6_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8263,6 +8371,7 @@ define void @s_shuffle_v3f32_v4f32__7_6_u() {
define void @s_shuffle_v3f32_v4f32__7_7_u() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8276,6 +8385,7 @@ define void @s_shuffle_v3f32_v4f32__7_7_u() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8289,6 +8399,7 @@ define void @s_shuffle_v3f32_v4f32__7_7_u() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8705,6 +8816,7 @@ define void @s_shuffle_v3f32_v4f32__7_7_7() {
define void @s_shuffle_v3f32_v4f32__u_0_0() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8718,6 +8830,7 @@ define void @s_shuffle_v3f32_v4f32__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8731,6 +8844,7 @@ define void @s_shuffle_v3f32_v4f32__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8913,6 +9027,7 @@ define void @s_shuffle_v3f32_v4f32__3_0_0() {
define void @s_shuffle_v3f32_v4f32__4_0_0() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__4_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8926,6 +9041,7 @@ define void @s_shuffle_v3f32_v4f32__4_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__4_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8939,6 +9055,7 @@ define void @s_shuffle_v3f32_v4f32__4_0_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__4_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -9136,6 +9253,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -9152,6 +9270,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -9164,6 +9283,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_0() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -9812,6 +9932,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -9828,6 +9949,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -9840,6 +9962,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_1() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -10866,6 +10989,7 @@ define void @s_shuffle_v3f32_v4f32__7_6_2() {
define void @s_shuffle_v3f32_v4f32__u_3_3() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -10879,6 +11003,7 @@ define void @s_shuffle_v3f32_v4f32__u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -10892,6 +11017,7 @@ define void @s_shuffle_v3f32_v4f32__u_3_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11074,6 +11200,7 @@ define void @s_shuffle_v3f32_v4f32__3_3_3() {
define void @s_shuffle_v3f32_v4f32__4_3_3() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__4_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11087,6 +11214,7 @@ define void @s_shuffle_v3f32_v4f32__4_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__4_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11100,6 +11228,7 @@ define void @s_shuffle_v3f32_v4f32__4_3_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__4_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11297,6 +11426,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -11313,6 +11443,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -11325,6 +11456,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_3() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11744,6 +11876,7 @@ define void @s_shuffle_v3f32_v4f32__0_4_4() {
define void @s_shuffle_v3f32_v4f32__1_4_4() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__1_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11756,6 +11889,7 @@ define void @s_shuffle_v3f32_v4f32__1_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__1_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11768,6 +11902,7 @@ define void @s_shuffle_v3f32_v4f32__1_4_4() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11786,6 +11921,7 @@ define void @s_shuffle_v3f32_v4f32__1_4_4() {
define void @s_shuffle_v3f32_v4f32__2_4_4() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__2_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11798,6 +11934,7 @@ define void @s_shuffle_v3f32_v4f32__2_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11810,6 +11947,7 @@ define void @s_shuffle_v3f32_v4f32__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11828,6 +11966,7 @@ define void @s_shuffle_v3f32_v4f32__2_4_4() {
define void @s_shuffle_v3f32_v4f32__3_4_4() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__3_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11840,6 +11979,7 @@ define void @s_shuffle_v3f32_v4f32__3_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__3_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11852,6 +11992,7 @@ define void @s_shuffle_v3f32_v4f32__3_4_4() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12031,6 +12172,7 @@ define void @s_shuffle_v3f32_v4f32__7_4_4() {
define void @s_shuffle_v3f32_v4f32__7_u_4() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12044,6 +12186,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12057,6 +12200,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_4() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12722,6 +12866,7 @@ define void @s_shuffle_v3f32_v4f32__7_5_5() {
define void @s_shuffle_v3f32_v4f32__7_u_5() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12735,6 +12880,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12748,6 +12894,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_5() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13720,6 +13867,7 @@ define void @s_shuffle_v3f32_v4f32__7_5_6() {
define void @s_shuffle_v3f32_v4f32__u_7_7() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__u_7_7:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13733,6 +13881,7 @@ define void @s_shuffle_v3f32_v4f32__u_7_7() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__u_7_7:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13746,6 +13895,7 @@ define void @s_shuffle_v3f32_v4f32__u_7_7() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__u_7_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -14113,6 +14263,7 @@ define void @s_shuffle_v3f32_v4f32__6_7_7() {
define void @s_shuffle_v3f32_v4f32__7_u_7() {
; GFX900-LABEL: s_shuffle_v3f32_v4f32__7_u_7:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -14126,6 +14277,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_7() {
;
; GFX90A-LABEL: s_shuffle_v3f32_v4f32__7_u_7:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -14139,6 +14291,7 @@ define void @s_shuffle_v3f32_v4f32__7_u_7() {
;
; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll
index 0cf6da3659dde..98c90cfac2fe6 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll
@@ -4817,6 +4817,7 @@ define void @s_shuffle_v3i16_v3i16__1_u_u() {
define void @s_shuffle_v3i16_v3i16__2_u_u() {
; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4829,6 +4830,7 @@ define void @s_shuffle_v3i16_v3i16__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4841,6 +4843,7 @@ define void @s_shuffle_v3i16_v3i16__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4923,6 +4926,7 @@ define void @s_shuffle_v3i16_v3i16__4_u_u() {
define void @s_shuffle_v3i16_v3i16__5_u_u() {
; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4935,6 +4939,7 @@ define void @s_shuffle_v3i16_v3i16__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4947,6 +4952,7 @@ define void @s_shuffle_v3i16_v3i16__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -7387,6 +7393,7 @@ define void @s_shuffle_v3i16_v3i16__1_3_3() {
define void @s_shuffle_v3i16_v3i16__2_3_3() {
; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7399,6 +7406,7 @@ define void @s_shuffle_v3i16_v3i16__2_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7411,6 +7419,7 @@ define void @s_shuffle_v3i16_v3i16__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll
index 977055e546bba..a8736558b36e9 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll
@@ -7957,6 +7957,7 @@ define void @s_shuffle_v3i16_v4i16__1_u_u() {
define void @s_shuffle_v3i16_v4i16__2_u_u() {
; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7969,6 +7970,7 @@ define void @s_shuffle_v3i16_v4i16__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7981,6 +7983,7 @@ define void @s_shuffle_v3i16_v4i16__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -8102,6 +8105,7 @@ define void @s_shuffle_v3i16_v4i16__5_u_u() {
define void @s_shuffle_v3i16_v4i16__6_u_u() {
; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -8114,6 +8118,7 @@ define void @s_shuffle_v3i16_v4i16__6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -8126,6 +8131,7 @@ define void @s_shuffle_v3i16_v4i16__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -12256,6 +12262,7 @@ define void @s_shuffle_v3i16_v4i16__1_4_4() {
define void @s_shuffle_v3i16_v4i16__2_4_4() {
; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12268,6 +12275,7 @@ define void @s_shuffle_v3i16_v4i16__2_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12280,6 +12288,7 @@ define void @s_shuffle_v3i16_v4i16__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
index ea4fac3b1d2b1..96257966cfc3c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
@@ -59,11 +59,12 @@ define void @v_shuffle_v3i32_v2i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -71,11 +72,12 @@ define void @v_shuffle_v3i32_v2i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -83,11 +85,12 @@ define void @v_shuffle_v3i32_v2i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -112,11 +115,12 @@ define void @v_shuffle_v3i32_v2i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -124,11 +128,12 @@ define void @v_shuffle_v3i32_v2i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -136,11 +141,12 @@ define void @v_shuffle_v3i32_v2i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -170,15 +176,16 @@ define void @v_shuffle_v3i32_v2i32__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -186,15 +193,16 @@ define void @v_shuffle_v3i32_v2i32__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -272,28 +280,30 @@ define void @v_shuffle_v3i32_v2i32__3_2_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3i32_v2i32__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -560,26 +570,29 @@ define void @v_shuffle_v3i32_v2i32__u_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3i32_v2i32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v2i32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -695,26 +708,29 @@ define void @v_shuffle_v3i32_v2i32__2_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3i32_v2i32__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v2i32__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -784,15 +800,16 @@ define void @v_shuffle_v3i32_v2i32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -800,14 +817,16 @@ define void @v_shuffle_v3i32_v2i32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -815,14 +834,16 @@ define void @v_shuffle_v3i32_v2i32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -1192,15 +1213,16 @@ define void @v_shuffle_v3i32_v2i32__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1208,15 +1230,16 @@ define void @v_shuffle_v3i32_v2i32__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -1394,11 +1417,12 @@ define void @v_shuffle_v3i32_v2i32__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1406,11 +1430,12 @@ define void @v_shuffle_v3i32_v2i32__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1418,11 +1443,12 @@ define void @v_shuffle_v3i32_v2i32__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -1492,39 +1518,44 @@ define void @v_shuffle_v3i32_v2i32__3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v2i32__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v2i32__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v2i32__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -1857,12 +1888,13 @@ define void @v_shuffle_v3i32_v2i32__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1870,12 +1902,13 @@ define void @v_shuffle_v3i32_v2i32__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -2101,6 +2134,7 @@ define void @s_shuffle_v3i32_v2i32__0_u_u() {
define void @s_shuffle_v3i32_v2i32__1_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2113,6 +2147,7 @@ define void @s_shuffle_v3i32_v2i32__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2125,6 +2160,7 @@ define void @s_shuffle_v3i32_v2i32__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2157,6 +2193,7 @@ define void @s_shuffle_v3i32_v2i32__2_u_u() {
define void @s_shuffle_v3i32_v2i32__3_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2169,6 +2206,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2181,6 +2219,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2200,6 +2239,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_u() {
define void @s_shuffle_v3i32_v2i32__3_0_u() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2216,6 +2256,7 @@ define void @s_shuffle_v3i32_v2i32__3_0_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2232,6 +2273,7 @@ define void @s_shuffle_v3i32_v2i32__3_0_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2307,6 +2349,7 @@ define void @s_shuffle_v3i32_v2i32__3_1_u() {
define void @s_shuffle_v3i32_v2i32__3_2_u() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2320,6 +2363,7 @@ define void @s_shuffle_v3i32_v2i32__3_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2333,6 +2377,7 @@ define void @s_shuffle_v3i32_v2i32__3_2_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2551,6 +2596,7 @@ define void @s_shuffle_v3i32_v2i32__3_3_3() {
define void @s_shuffle_v3i32_v2i32__u_0_0() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2564,6 +2610,7 @@ define void @s_shuffle_v3i32_v2i32__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2577,6 +2624,7 @@ define void @s_shuffle_v3i32_v2i32__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2663,6 +2711,7 @@ define void @s_shuffle_v3i32_v2i32__1_0_0() {
define void @s_shuffle_v3i32_v2i32__2_0_0() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__2_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2676,6 +2725,7 @@ define void @s_shuffle_v3i32_v2i32__2_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2689,6 +2739,7 @@ define void @s_shuffle_v3i32_v2i32__2_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2766,6 +2817,7 @@ define void @s_shuffle_v3i32_v2i32__3_0_0() {
define void @s_shuffle_v3i32_v2i32__3_u_0() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2782,6 +2834,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2798,6 +2851,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3065,6 +3119,7 @@ define void @s_shuffle_v3i32_v2i32__3_1_1() {
define void @s_shuffle_v3i32_v2i32__3_u_1() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3081,6 +3136,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_1() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3097,6 +3153,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_1() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3290,6 +3347,7 @@ define void @s_shuffle_v3i32_v2i32__0_2_2() {
define void @s_shuffle_v3i32_v2i32__1_2_2() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3302,6 +3360,7 @@ define void @s_shuffle_v3i32_v2i32__1_2_2() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3314,6 +3373,7 @@ define void @s_shuffle_v3i32_v2i32__1_2_2() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3395,6 +3455,7 @@ define void @s_shuffle_v3i32_v2i32__3_2_2() {
define void @s_shuffle_v3i32_v2i32__3_u_2() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3408,6 +3469,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3421,6 +3483,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_2() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3702,6 +3765,7 @@ define void @s_shuffle_v3i32_v2i32__2_3_3() {
define void @s_shuffle_v3i32_v2i32__3_u_3() {
; GFX900-LABEL: s_shuffle_v3i32_v2i32__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3715,6 +3779,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v2i32__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3728,6 +3793,7 @@ define void @s_shuffle_v3i32_v2i32__3_u_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
index 7061c13b28d03..a8a93e4d22aa8 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
@@ -61,9 +61,10 @@ define void @v_shuffle_v3i32_v3i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -73,9 +74,10 @@ define void @v_shuffle_v3i32_v3i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -85,9 +87,10 @@ define void @v_shuffle_v3i32_v3i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -99,37 +102,41 @@ define void @v_shuffle_v3i32_v3i32__1_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -156,9 +163,10 @@ define void @v_shuffle_v3i32_v3i32__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -168,9 +176,10 @@ define void @v_shuffle_v3i32_v3i32__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -180,9 +189,10 @@ define void @v_shuffle_v3i32_v3i32__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -195,37 +205,41 @@ define void @v_shuffle_v3i32_v3i32__4_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -242,13 +256,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -258,13 +273,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -274,14 +290,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -347,49 +363,53 @@ define void @v_shuffle_v3i32_v3i32__5_1_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v2
+; GFX900-NEXT: global_store_dwordx3 v9, v[6:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[8:10], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[8:10], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -402,39 +422,44 @@ define void @v_shuffle_v3i32_v3i32__5_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -490,40 +515,44 @@ define void @v_shuffle_v3i32_v3i32__5_4_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -859,10 +888,11 @@ define void @v_shuffle_v3i32_v3i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,10 +902,11 @@ define void @v_shuffle_v3i32_v3i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -885,10 +916,11 @@ define void @v_shuffle_v3i32_v3i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1043,10 +1075,11 @@ define void @v_shuffle_v3i32_v3i32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1056,10 +1089,11 @@ define void @v_shuffle_v3i32_v3i32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1069,10 +1103,11 @@ define void @v_shuffle_v3i32_v3i32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1206,13 +1241,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1222,13 +1258,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1238,14 +1275,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1818,13 +1855,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,13 +1872,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1850,14 +1889,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2754,9 +2793,10 @@ define void @v_shuffle_v3i32_v3i32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2766,9 +2806,10 @@ define void @v_shuffle_v3i32_v3i32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2778,9 +2819,10 @@ define void @v_shuffle_v3i32_v3i32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2792,37 +2834,41 @@ define void @v_shuffle_v3i32_v3i32__1_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2941,39 +2987,44 @@ define void @v_shuffle_v3i32_v3i32__5_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3544,40 +3595,44 @@ define void @v_shuffle_v3i32_v3i32__5_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v3i32__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4462,6 +4517,7 @@ define void @s_shuffle_v3i32_v3i32__0_u_u() {
define void @s_shuffle_v3i32_v3i32__1_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4474,6 +4530,7 @@ define void @s_shuffle_v3i32_v3i32__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4486,6 +4543,7 @@ define void @s_shuffle_v3i32_v3i32__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4504,6 +4562,7 @@ define void @s_shuffle_v3i32_v3i32__1_u_u() {
define void @s_shuffle_v3i32_v3i32__2_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4516,6 +4575,7 @@ define void @s_shuffle_v3i32_v3i32__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4528,6 +4588,7 @@ define void @s_shuffle_v3i32_v3i32__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4560,6 +4621,7 @@ define void @s_shuffle_v3i32_v3i32__3_u_u() {
define void @s_shuffle_v3i32_v3i32__4_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4572,6 +4634,7 @@ define void @s_shuffle_v3i32_v3i32__4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4584,6 +4647,7 @@ define void @s_shuffle_v3i32_v3i32__4_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4603,6 +4667,7 @@ define void @s_shuffle_v3i32_v3i32__4_u_u() {
define void @s_shuffle_v3i32_v3i32__5_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4615,6 +4680,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4627,6 +4693,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4646,14 +4713,15 @@ define void @s_shuffle_v3i32_v3i32__5_u_u() {
define void @s_shuffle_v3i32_v3i32__5_0_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -4662,14 +4730,15 @@ define void @s_shuffle_v3i32_v3i32__5_0_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -4678,6 +4747,7 @@ define void @s_shuffle_v3i32_v3i32__5_0_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4753,14 +4823,15 @@ define void @s_shuffle_v3i32_v3i32__5_1_u() {
define void @s_shuffle_v3i32_v3i32__5_2_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -4769,14 +4840,15 @@ define void @s_shuffle_v3i32_v3i32__5_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -4785,6 +4857,7 @@ define void @s_shuffle_v3i32_v3i32__5_2_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4808,6 +4881,7 @@ define void @s_shuffle_v3i32_v3i32__5_2_u() {
define void @s_shuffle_v3i32_v3i32__5_3_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4821,6 +4895,7 @@ define void @s_shuffle_v3i32_v3i32__5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4834,6 +4909,7 @@ define void @s_shuffle_v3i32_v3i32__5_3_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4873,6 +4949,7 @@ define void @s_shuffle_v3i32_v3i32__5_4_u() {
define void @s_shuffle_v3i32_v3i32__5_5_u() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4886,6 +4963,7 @@ define void @s_shuffle_v3i32_v3i32__5_5_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4899,6 +4977,7 @@ define void @s_shuffle_v3i32_v3i32__5_5_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5208,6 +5287,7 @@ define void @s_shuffle_v3i32_v3i32__5_5_5() {
define void @s_shuffle_v3i32_v3i32__u_0_0() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -5221,6 +5301,7 @@ define void @s_shuffle_v3i32_v3i32__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -5234,6 +5315,7 @@ define void @s_shuffle_v3i32_v3i32__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5368,6 +5450,7 @@ define void @s_shuffle_v3i32_v3i32__2_0_0() {
define void @s_shuffle_v3i32_v3i32__3_0_0() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__3_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -5381,6 +5464,7 @@ define void @s_shuffle_v3i32_v3i32__3_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__3_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -5394,6 +5478,7 @@ define void @s_shuffle_v3i32_v3i32__3_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__3_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5529,14 +5614,15 @@ define void @s_shuffle_v3i32_v3i32__5_0_0() {
define void @s_shuffle_v3i32_v3i32__5_u_0() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -5545,14 +5631,15 @@ define void @s_shuffle_v3i32_v3i32__5_u_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -5561,6 +5648,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6015,14 +6103,15 @@ define void @s_shuffle_v3i32_v3i32__5_1_1() {
define void @s_shuffle_v3i32_v3i32__5_u_1() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -6031,14 +6120,15 @@ define void @s_shuffle_v3i32_v3i32__5_u_1() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -6047,6 +6137,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_1() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6827,6 +6918,7 @@ define void @s_shuffle_v3i32_v3i32__0_3_3() {
define void @s_shuffle_v3i32_v3i32__1_3_3() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__1_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6839,6 +6931,7 @@ define void @s_shuffle_v3i32_v3i32__1_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__1_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6851,6 +6944,7 @@ define void @s_shuffle_v3i32_v3i32__1_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6869,6 +6963,7 @@ define void @s_shuffle_v3i32_v3i32__1_3_3() {
define void @s_shuffle_v3i32_v3i32__2_3_3() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6881,6 +6976,7 @@ define void @s_shuffle_v3i32_v3i32__2_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6893,6 +6989,7 @@ define void @s_shuffle_v3i32_v3i32__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7023,6 +7120,7 @@ define void @s_shuffle_v3i32_v3i32__5_3_3() {
define void @s_shuffle_v3i32_v3i32__5_u_3() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7036,6 +7134,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7049,6 +7148,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7532,6 +7632,7 @@ define void @s_shuffle_v3i32_v3i32__5_4_4() {
define void @s_shuffle_v3i32_v3i32__5_u_4() {
; GFX900-LABEL: s_shuffle_v3i32_v3i32__5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7545,6 +7646,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v3i32__5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7558,6 +7660,7 @@ define void @s_shuffle_v3i32_v3i32__5_u_4() {
;
; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
index 11d1897d0449f..80e32ef57442a 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v3i32_v4i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v3i32_v4i32__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -102,33 +104,37 @@ define void @v_shuffle_v3i32_v4i32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -144,6 +150,7 @@ define void @v_shuffle_v3i32_v4i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -156,6 +163,7 @@ define void @v_shuffle_v3i32_v4i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -168,6 +176,7 @@ define void @v_shuffle_v3i32_v4i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -208,9 +217,10 @@ define void @v_shuffle_v3i32_v4i32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -220,9 +230,10 @@ define void @v_shuffle_v3i32_v4i32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -239,33 +250,37 @@ define void @v_shuffle_v3i32_v4i32__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -282,6 +297,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -294,6 +310,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -306,6 +323,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -328,6 +346,7 @@ define void @v_shuffle_v3i32_v4i32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v2, v0
@@ -344,6 +363,7 @@ define void @v_shuffle_v3i32_v4i32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
@@ -361,6 +381,7 @@ define void @v_shuffle_v3i32_v4i32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v3, v0
@@ -447,15 +468,16 @@ define void @v_shuffle_v3i32_v4i32__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -463,15 +485,16 @@ define void @v_shuffle_v3i32_v4i32__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -488,6 +511,7 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
@@ -504,6 +528,7 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
@@ -520,6 +545,7 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
@@ -539,40 +565,44 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v4i32__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -640,27 +670,30 @@ define void @v_shuffle_v3i32_v4i32__7_6_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -677,6 +710,7 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: v_mov_b32_e32 v1, v3
@@ -690,6 +724,7 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
@@ -703,6 +738,7 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: v_mov_b32_e32 v1, v3
@@ -1141,10 +1177,11 @@ define void @v_shuffle_v3i32_v4i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1154,10 +1191,11 @@ define void @v_shuffle_v3i32_v4i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1167,10 +1205,11 @@ define void @v_shuffle_v3i32_v4i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1372,10 +1411,11 @@ define void @v_shuffle_v3i32_v4i32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1385,10 +1425,11 @@ define void @v_shuffle_v3i32_v4i32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1398,10 +1439,11 @@ define void @v_shuffle_v3i32_v4i32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1596,6 +1638,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v0
@@ -1612,6 +1655,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
@@ -1629,6 +1673,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -2425,10 +2470,11 @@ define void @v_shuffle_v3i32_v4i32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2441,10 +2487,11 @@ define void @v_shuffle_v3i32_v4i32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2458,10 +2505,11 @@ define void @v_shuffle_v3i32_v4i32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3645,6 +3693,7 @@ define void @v_shuffle_v3i32_v4i32__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -3658,6 +3707,7 @@ define void @v_shuffle_v3i32_v4i32__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -3874,6 +3924,7 @@ define void @v_shuffle_v3i32_v4i32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -3887,6 +3938,7 @@ define void @v_shuffle_v3i32_v4i32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -4092,6 +4144,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
@@ -4108,6 +4161,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
@@ -4529,9 +4583,10 @@ define void @v_shuffle_v3i32_v4i32__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4541,9 +4596,10 @@ define void @v_shuffle_v3i32_v4i32__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4559,33 +4615,37 @@ define void @v_shuffle_v3i32_v4i32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4601,6 +4661,7 @@ define void @v_shuffle_v3i32_v4i32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -4613,6 +4674,7 @@ define void @v_shuffle_v3i32_v4i32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -4625,6 +4687,7 @@ define void @v_shuffle_v3i32_v4i32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -4794,40 +4857,44 @@ define void @v_shuffle_v3i32_v4i32__7_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v4i32__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5608,40 +5675,44 @@ define void @v_shuffle_v3i32_v4i32__7_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i32_v4i32__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6800,6 +6871,7 @@ define void @v_shuffle_v3i32_v4i32__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -6813,6 +6885,7 @@ define void @v_shuffle_v3i32_v4i32__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -7213,6 +7286,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -7226,6 +7300,7 @@ define void @v_shuffle_v3i32_v4i32__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -7666,6 +7741,7 @@ define void @s_shuffle_v3i32_v4i32__0_u_u() {
define void @s_shuffle_v3i32_v4i32__1_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7678,6 +7754,7 @@ define void @s_shuffle_v3i32_v4i32__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7690,6 +7767,7 @@ define void @s_shuffle_v3i32_v4i32__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7708,6 +7786,7 @@ define void @s_shuffle_v3i32_v4i32__1_u_u() {
define void @s_shuffle_v3i32_v4i32__2_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7720,6 +7799,7 @@ define void @s_shuffle_v3i32_v4i32__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7732,6 +7812,7 @@ define void @s_shuffle_v3i32_v4i32__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7750,6 +7831,7 @@ define void @s_shuffle_v3i32_v4i32__2_u_u() {
define void @s_shuffle_v3i32_v4i32__3_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7762,6 +7844,7 @@ define void @s_shuffle_v3i32_v4i32__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7774,6 +7857,7 @@ define void @s_shuffle_v3i32_v4i32__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7806,6 +7890,7 @@ define void @s_shuffle_v3i32_v4i32__4_u_u() {
define void @s_shuffle_v3i32_v4i32__5_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7818,6 +7903,7 @@ define void @s_shuffle_v3i32_v4i32__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7830,6 +7916,7 @@ define void @s_shuffle_v3i32_v4i32__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7849,6 +7936,7 @@ define void @s_shuffle_v3i32_v4i32__5_u_u() {
define void @s_shuffle_v3i32_v4i32__6_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7861,6 +7949,7 @@ define void @s_shuffle_v3i32_v4i32__6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7873,6 +7962,7 @@ define void @s_shuffle_v3i32_v4i32__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7892,6 +7982,7 @@ define void @s_shuffle_v3i32_v4i32__6_u_u() {
define void @s_shuffle_v3i32_v4i32__7_u_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7904,6 +7995,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7916,6 +8008,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7939,6 +8032,7 @@ define void @s_shuffle_v3i32_v4i32__7_0_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -7955,6 +8049,7 @@ define void @s_shuffle_v3i32_v4i32__7_0_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -7967,6 +8062,7 @@ define void @s_shuffle_v3i32_v4i32__7_0_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8046,6 +8142,7 @@ define void @s_shuffle_v3i32_v4i32__7_2_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -8062,6 +8159,7 @@ define void @s_shuffle_v3i32_v4i32__7_2_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -8074,6 +8172,7 @@ define void @s_shuffle_v3i32_v4i32__7_2_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8101,6 +8200,7 @@ define void @s_shuffle_v3i32_v4i32__7_3_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -8117,6 +8217,7 @@ define void @s_shuffle_v3i32_v4i32__7_3_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -8129,6 +8230,7 @@ define void @s_shuffle_v3i32_v4i32__7_3_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8152,6 +8254,7 @@ define void @s_shuffle_v3i32_v4i32__7_3_u() {
define void @s_shuffle_v3i32_v4i32__7_4_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8165,6 +8268,7 @@ define void @s_shuffle_v3i32_v4i32__7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8178,6 +8282,7 @@ define void @s_shuffle_v3i32_v4i32__7_4_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8217,6 +8322,7 @@ define void @s_shuffle_v3i32_v4i32__7_5_u() {
define void @s_shuffle_v3i32_v4i32__7_6_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8230,6 +8336,7 @@ define void @s_shuffle_v3i32_v4i32__7_6_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8243,6 +8350,7 @@ define void @s_shuffle_v3i32_v4i32__7_6_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8263,6 +8371,7 @@ define void @s_shuffle_v3i32_v4i32__7_6_u() {
define void @s_shuffle_v3i32_v4i32__7_7_u() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8276,6 +8385,7 @@ define void @s_shuffle_v3i32_v4i32__7_7_u() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8289,6 +8399,7 @@ define void @s_shuffle_v3i32_v4i32__7_7_u() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8705,6 +8816,7 @@ define void @s_shuffle_v3i32_v4i32__7_7_7() {
define void @s_shuffle_v3i32_v4i32__u_0_0() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8718,6 +8830,7 @@ define void @s_shuffle_v3i32_v4i32__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8731,6 +8844,7 @@ define void @s_shuffle_v3i32_v4i32__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8913,6 +9027,7 @@ define void @s_shuffle_v3i32_v4i32__3_0_0() {
define void @s_shuffle_v3i32_v4i32__4_0_0() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__4_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8926,6 +9041,7 @@ define void @s_shuffle_v3i32_v4i32__4_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__4_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8939,6 +9055,7 @@ define void @s_shuffle_v3i32_v4i32__4_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__4_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -9136,6 +9253,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -9152,6 +9270,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -9164,6 +9283,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_0() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -9812,6 +9932,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -9828,6 +9949,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -9840,6 +9962,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_1() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -10866,6 +10989,7 @@ define void @s_shuffle_v3i32_v4i32__7_6_2() {
define void @s_shuffle_v3i32_v4i32__u_3_3() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -10879,6 +11003,7 @@ define void @s_shuffle_v3i32_v4i32__u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -10892,6 +11017,7 @@ define void @s_shuffle_v3i32_v4i32__u_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11074,6 +11200,7 @@ define void @s_shuffle_v3i32_v4i32__3_3_3() {
define void @s_shuffle_v3i32_v4i32__4_3_3() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__4_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11087,6 +11214,7 @@ define void @s_shuffle_v3i32_v4i32__4_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__4_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11100,6 +11228,7 @@ define void @s_shuffle_v3i32_v4i32__4_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__4_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11297,6 +11426,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -11313,6 +11443,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -11325,6 +11456,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_3() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11744,6 +11876,7 @@ define void @s_shuffle_v3i32_v4i32__0_4_4() {
define void @s_shuffle_v3i32_v4i32__1_4_4() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11756,6 +11889,7 @@ define void @s_shuffle_v3i32_v4i32__1_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11768,6 +11902,7 @@ define void @s_shuffle_v3i32_v4i32__1_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11786,6 +11921,7 @@ define void @s_shuffle_v3i32_v4i32__1_4_4() {
define void @s_shuffle_v3i32_v4i32__2_4_4() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11798,6 +11934,7 @@ define void @s_shuffle_v3i32_v4i32__2_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11810,6 +11947,7 @@ define void @s_shuffle_v3i32_v4i32__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11828,6 +11966,7 @@ define void @s_shuffle_v3i32_v4i32__2_4_4() {
define void @s_shuffle_v3i32_v4i32__3_4_4() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11840,6 +11979,7 @@ define void @s_shuffle_v3i32_v4i32__3_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11852,6 +11992,7 @@ define void @s_shuffle_v3i32_v4i32__3_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12031,6 +12172,7 @@ define void @s_shuffle_v3i32_v4i32__7_4_4() {
define void @s_shuffle_v3i32_v4i32__7_u_4() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12044,6 +12186,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12057,6 +12200,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_4() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12722,6 +12866,7 @@ define void @s_shuffle_v3i32_v4i32__7_5_5() {
define void @s_shuffle_v3i32_v4i32__7_u_5() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12735,6 +12880,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12748,6 +12894,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_5() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13720,6 +13867,7 @@ define void @s_shuffle_v3i32_v4i32__7_5_6() {
define void @s_shuffle_v3i32_v4i32__u_7_7() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__u_7_7:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13733,6 +13881,7 @@ define void @s_shuffle_v3i32_v4i32__u_7_7() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__u_7_7:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13746,6 +13895,7 @@ define void @s_shuffle_v3i32_v4i32__u_7_7() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__u_7_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -14113,6 +14263,7 @@ define void @s_shuffle_v3i32_v4i32__6_7_7() {
define void @s_shuffle_v3i32_v4i32__7_u_7() {
; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_7:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -14126,6 +14277,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_7() {
;
; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_7:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -14139,6 +14291,7 @@ define void @s_shuffle_v3i32_v4i32__7_u_7() {
;
; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
index a15fc3212f474..c607bc7dc3960 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
@@ -57,40 +57,44 @@ define void @v_shuffle_v3i64_v2i64__0_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i64_v2i64__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -113,40 +117,44 @@ define void @v_shuffle_v3i64_v2i64__2_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i64_v2i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -588,11 +596,12 @@ define void @v_shuffle_v3i64_v2i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -602,11 +611,12 @@ define void @v_shuffle_v3i64_v2i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -616,11 +626,12 @@ define void @v_shuffle_v3i64_v2i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -732,11 +743,12 @@ define void @v_shuffle_v3i64_v2i64__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -746,11 +758,12 @@ define void @v_shuffle_v3i64_v2i64__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -760,11 +773,12 @@ define void @v_shuffle_v3i64_v2i64__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -841,14 +855,15 @@ define void @v_shuffle_v3i64_v2i64__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -858,14 +873,15 @@ define void @v_shuffle_v3i64_v2i64__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -875,15 +891,15 @@ define void @v_shuffle_v3i64_v2i64__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -1249,16 +1265,17 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1266,16 +1283,17 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1283,14 +1301,16 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -1484,40 +1504,44 @@ define void @v_shuffle_v3i64_v2i64__0_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i64_v2i64__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1589,43 +1613,46 @@ define void @v_shuffle_v3i64_v2i64__3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i64_v2i64__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1962,43 +1989,46 @@ define void @v_shuffle_v3i64_v2i64__2_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3i64_v2i64__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2236,6 +2266,7 @@ define void @s_shuffle_v3i64_v2i64__0_u_u() {
define void @s_shuffle_v3i64_v2i64__1_u_u() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2249,6 +2280,7 @@ define void @s_shuffle_v3i64_v2i64__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2262,6 +2294,7 @@ define void @s_shuffle_v3i64_v2i64__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2295,6 +2328,7 @@ define void @s_shuffle_v3i64_v2i64__2_u_u() {
define void @s_shuffle_v3i64_v2i64__3_u_u() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2308,6 +2342,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2321,6 +2356,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2343,13 +2379,14 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2361,13 +2398,14 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -2377,6 +2415,7 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2457,6 +2496,7 @@ define void @s_shuffle_v3i64_v2i64__3_1_u() {
define void @s_shuffle_v3i64_v2i64__3_2_u() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2472,6 +2512,7 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2487,6 +2528,7 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2731,6 +2773,7 @@ define void @s_shuffle_v3i64_v2i64__3_3_3() {
define void @s_shuffle_v3i64_v2i64__u_0_0() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2746,6 +2789,7 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2761,6 +2805,7 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2860,6 +2905,7 @@ define void @s_shuffle_v3i64_v2i64__1_0_0() {
define void @s_shuffle_v3i64_v2i64__2_0_0() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2875,6 +2921,7 @@ define void @s_shuffle_v3i64_v2i64__2_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2890,6 +2937,7 @@ define void @s_shuffle_v3i64_v2i64__2_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2980,13 +3028,14 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2998,13 +3047,14 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -3014,6 +3064,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3314,13 +3365,14 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3332,13 +3384,14 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3348,6 +3401,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3561,6 +3615,7 @@ define void @s_shuffle_v3i64_v2i64__0_2_2() {
define void @s_shuffle_v3i64_v2i64__1_2_2() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3574,6 +3629,7 @@ define void @s_shuffle_v3i64_v2i64__1_2_2() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3587,6 +3643,7 @@ define void @s_shuffle_v3i64_v2i64__1_2_2() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3678,6 +3735,7 @@ define void @s_shuffle_v3i64_v2i64__3_2_2() {
define void @s_shuffle_v3i64_v2i64__3_u_2() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3693,6 +3751,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3708,6 +3767,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_2() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4016,6 +4076,7 @@ define void @s_shuffle_v3i64_v2i64__2_3_3() {
define void @s_shuffle_v3i64_v2i64__3_u_3() {
; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4031,6 +4092,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4046,6 +4108,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_3() {
;
; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
index f15dd7d2772e5..30197dfd13c5c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
@@ -103,6 +103,7 @@ define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -116,6 +117,7 @@ define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -129,6 +131,7 @@ define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -199,6 +202,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -212,6 +216,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -225,6 +230,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -884,11 +890,12 @@ define void @v_shuffle_v3i64_v3i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -898,11 +905,12 @@ define void @v_shuffle_v3i64_v3i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -912,11 +920,12 @@ define void @v_shuffle_v3i64_v3i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1082,11 +1091,12 @@ define void @v_shuffle_v3i64_v3i64__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1096,11 +1106,12 @@ define void @v_shuffle_v3i64_v3i64__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1110,11 +1121,12 @@ define void @v_shuffle_v3i64_v3i64__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1249,14 +1261,15 @@ define void @v_shuffle_v3i64_v3i64__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -1266,14 +1279,15 @@ define void @v_shuffle_v3i64_v3i64__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -1283,15 +1297,15 @@ define void @v_shuffle_v3i64_v3i64__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -1880,16 +1894,17 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1897,16 +1912,17 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1914,14 +1930,16 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -2524,6 +2542,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
@@ -2541,6 +2560,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
@@ -2558,6 +2578,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
@@ -2918,6 +2939,7 @@ define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -2931,6 +2953,7 @@ define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -2944,6 +2967,7 @@ define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -3080,6 +3104,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -3094,6 +3119,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -3108,6 +3134,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -3711,13 +3738,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3725,13 +3753,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3739,13 +3768,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4307,6 +4337,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -4321,6 +4352,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -4335,6 +4367,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -4694,6 +4727,7 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -4707,6 +4741,7 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -4716,6 +4751,7 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4737,8 +4773,11 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -4748,8 +4787,11 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -4757,6 +4799,7 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4794,6 +4837,7 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -4807,6 +4851,7 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -4816,6 +4861,7 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4838,8 +4884,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -4849,8 +4898,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -4858,6 +4910,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4883,10 +4936,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -4901,10 +4955,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4916,12 +4971,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -4990,13 +5048,16 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[20:25]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s24
+; GFX900-NEXT: s_mov_b32 s9, s25
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5006,13 +5067,16 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[20:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s24
+; GFX90A-NEXT: s_mov_b32 s9, s25
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5022,13 +5086,14 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -5045,12 +5110,15 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
define void @s_shuffle_v3i64_v3i64__5_3_u() {
; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5058,12 +5126,15 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5071,6 +5142,7 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5115,12 +5187,13 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5130,12 +5203,13 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5143,6 +5217,7 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5498,6 +5573,7 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -5513,6 +5589,7 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -5524,6 +5601,7 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5680,6 +5758,7 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -5695,6 +5774,7 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -5706,6 +5786,7 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5864,10 +5945,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -5882,10 +5964,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -5897,12 +5980,15 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6390,10 +6476,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -6408,10 +6495,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -6423,12 +6511,15 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -7256,6 +7347,7 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -7269,6 +7361,7 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -7278,6 +7371,7 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7299,8 +7393,11 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7310,8 +7407,11 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7319,6 +7419,7 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7464,12 +7565,15 @@ define void @s_shuffle_v3i64_v3i64__5_3_3() {
define void @s_shuffle_v3i64_v3i64__5_u_3() {
; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7477,12 +7581,15 @@ define void @s_shuffle_v3i64_v3i64__5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7490,6 +7597,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_3() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8014,10 +8122,13 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8027,10 +8138,13 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8038,6 +8152,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() {
;
; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
index 6e156d2d4a2f5..082ce443694a0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
@@ -142,6 +142,7 @@ define void @v_shuffle_v3i64_v4i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -155,6 +156,7 @@ define void @v_shuffle_v3i64_v4i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -168,6 +170,7 @@ define void @v_shuffle_v3i64_v4i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -278,6 +281,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -291,6 +295,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -304,6 +309,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -1172,11 +1178,12 @@ define void @v_shuffle_v3i64_v4i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1186,11 +1193,12 @@ define void @v_shuffle_v3i64_v4i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1200,11 +1208,12 @@ define void @v_shuffle_v3i64_v4i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1418,11 +1427,12 @@ define void @v_shuffle_v3i64_v4i64__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1432,11 +1442,12 @@ define void @v_shuffle_v3i64_v4i64__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1446,11 +1457,12 @@ define void @v_shuffle_v3i64_v4i64__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1643,14 +1655,15 @@ define void @v_shuffle_v3i64_v4i64__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
@@ -1660,14 +1673,15 @@ define void @v_shuffle_v3i64_v4i64__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
@@ -1677,15 +1691,15 @@ define void @v_shuffle_v3i64_v4i64__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -2496,16 +2510,17 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v0, v10
; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2513,16 +2528,17 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v10
; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2530,14 +2546,16 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
@@ -3363,6 +3381,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
@@ -3380,6 +3399,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
@@ -3397,6 +3417,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
@@ -4225,6 +4246,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
@@ -4242,6 +4264,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
@@ -4259,6 +4282,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
@@ -4774,6 +4798,7 @@ define void @v_shuffle_v3i64_v4i64__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -4787,6 +4812,7 @@ define void @v_shuffle_v3i64_v4i64__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -4800,6 +4826,7 @@ define void @v_shuffle_v3i64_v4i64__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -4985,6 +5012,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -4999,6 +5027,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -5013,6 +5042,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -5830,13 +5860,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -5844,13 +5875,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -5858,13 +5890,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6689,6 +6722,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -6703,6 +6737,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -6717,6 +6752,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -7496,6 +7532,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -7510,6 +7547,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -7524,6 +7562,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -7990,6 +8029,7 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -8003,6 +8043,7 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -8012,6 +8053,7 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8033,8 +8075,11 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8044,8 +8089,11 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8053,6 +8101,7 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8074,10 +8123,11 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8087,10 +8137,11 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8098,6 +8149,7 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8135,6 +8187,7 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -8148,6 +8201,7 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -8157,6 +8211,7 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8179,8 +8234,11 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8190,8 +8248,11 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8199,6 +8260,7 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8221,10 +8283,11 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8234,10 +8297,11 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8245,6 +8309,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8272,6 +8337,7 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
@@ -8290,6 +8356,7 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
@@ -8303,14 +8370,14 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -8384,15 +8451,16 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8402,15 +8470,16 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8422,6 +8491,7 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
@@ -8445,13 +8515,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8461,13 +8534,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8479,6 +8555,7 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
@@ -8500,14 +8577,15 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
define void @s_shuffle_v3i64_v4i64__7_4_u() {
; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8515,14 +8593,15 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8530,6 +8609,7 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8574,12 +8654,13 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8589,12 +8670,13 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8602,6 +8684,7 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8626,10 +8709,13 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8639,10 +8725,13 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8650,6 +8739,7 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -9114,6 +9204,7 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9129,6 +9220,7 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9140,6 +9232,7 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -9353,6 +9446,7 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9368,6 +9462,7 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9379,6 +9474,7 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -9606,6 +9702,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9624,6 +9721,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9637,14 +9735,15 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -10358,6 +10457,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s6
@@ -10376,6 +10476,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s6
@@ -10389,14 +10490,15 @@ define void @s_shuffle_v3i64_v4i64__7_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -11528,6 +11630,7 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
@@ -11543,6 +11646,7 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
@@ -11554,6 +11658,7 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -11771,6 +11876,7 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
@@ -11786,6 +11892,7 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
@@ -11797,6 +11904,7 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12017,15 +12125,16 @@ define void @s_shuffle_v3i64_v4i64__7_u_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12035,15 +12144,16 @@ define void @s_shuffle_v3i64_v4i64__7_u_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12055,6 +12165,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
@@ -12524,6 +12635,7 @@ define void @s_shuffle_v3i64_v4i64__1_4_4() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -12537,6 +12649,7 @@ define void @s_shuffle_v3i64_v4i64__1_4_4() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -12546,6 +12659,7 @@ define void @s_shuffle_v3i64_v4i64__1_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12567,8 +12681,11 @@ define void @s_shuffle_v3i64_v4i64__2_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12578,8 +12695,11 @@ define void @s_shuffle_v3i64_v4i64__2_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12587,6 +12707,7 @@ define void @s_shuffle_v3i64_v4i64__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12608,10 +12729,11 @@ define void @s_shuffle_v3i64_v4i64__3_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12621,10 +12743,11 @@ define void @s_shuffle_v3i64_v4i64__3_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12632,6 +12755,7 @@ define void @s_shuffle_v3i64_v4i64__3_4_4() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12835,14 +12959,15 @@ define void @s_shuffle_v3i64_v4i64__7_4_4() {
define void @s_shuffle_v3i64_v4i64__7_u_4() {
; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12850,14 +12975,15 @@ define void @s_shuffle_v3i64_v4i64__7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12865,6 +12991,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_4() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13594,12 +13721,13 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -13609,12 +13737,13 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -13622,6 +13751,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14715,6 +14845,7 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
@@ -14730,6 +14861,7 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
@@ -14741,6 +14873,7 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_7_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15161,12 +15294,13 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -15176,12 +15310,13 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -15189,6 +15324,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() {
;
; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
index fe132493ce536..815a23f273f0d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
@@ -57,40 +57,44 @@ define void @v_shuffle_v3p0_v2p0__0_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p0_v2p0__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -113,40 +117,44 @@ define void @v_shuffle_v3p0_v2p0__2_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p0_v2p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -588,11 +596,12 @@ define void @v_shuffle_v3p0_v2p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -602,11 +611,12 @@ define void @v_shuffle_v3p0_v2p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -616,11 +626,12 @@ define void @v_shuffle_v3p0_v2p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -732,11 +743,12 @@ define void @v_shuffle_v3p0_v2p0__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -746,11 +758,12 @@ define void @v_shuffle_v3p0_v2p0__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -760,11 +773,12 @@ define void @v_shuffle_v3p0_v2p0__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -841,14 +855,15 @@ define void @v_shuffle_v3p0_v2p0__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -858,14 +873,15 @@ define void @v_shuffle_v3p0_v2p0__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -875,15 +891,15 @@ define void @v_shuffle_v3p0_v2p0__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -1249,16 +1265,17 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1266,16 +1283,17 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1283,14 +1301,16 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -1484,40 +1504,44 @@ define void @v_shuffle_v3p0_v2p0__0_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p0_v2p0__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1589,43 +1613,46 @@ define void @v_shuffle_v3p0_v2p0__3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p0_v2p0__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1962,43 +1989,46 @@ define void @v_shuffle_v3p0_v2p0__2_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p0_v2p0__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2236,6 +2266,7 @@ define void @s_shuffle_v3p0_v2p0__0_u_u() {
define void @s_shuffle_v3p0_v2p0__1_u_u() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2249,6 +2280,7 @@ define void @s_shuffle_v3p0_v2p0__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2262,6 +2294,7 @@ define void @s_shuffle_v3p0_v2p0__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2295,6 +2328,7 @@ define void @s_shuffle_v3p0_v2p0__2_u_u() {
define void @s_shuffle_v3p0_v2p0__3_u_u() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2308,6 +2342,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2321,6 +2356,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2343,13 +2379,14 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2361,13 +2398,14 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -2377,6 +2415,7 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2457,6 +2496,7 @@ define void @s_shuffle_v3p0_v2p0__3_1_u() {
define void @s_shuffle_v3p0_v2p0__3_2_u() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2472,6 +2512,7 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2487,6 +2528,7 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2731,6 +2773,7 @@ define void @s_shuffle_v3p0_v2p0__3_3_3() {
define void @s_shuffle_v3p0_v2p0__u_0_0() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2746,6 +2789,7 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2761,6 +2805,7 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2860,6 +2905,7 @@ define void @s_shuffle_v3p0_v2p0__1_0_0() {
define void @s_shuffle_v3p0_v2p0__2_0_0() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -2875,6 +2921,7 @@ define void @s_shuffle_v3p0_v2p0__2_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -2890,6 +2937,7 @@ define void @s_shuffle_v3p0_v2p0__2_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -2980,13 +3028,14 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -2998,13 +3047,14 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -3014,6 +3064,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3314,13 +3365,14 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -3332,13 +3384,14 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -3348,6 +3401,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3561,6 +3615,7 @@ define void @s_shuffle_v3p0_v2p0__0_2_2() {
define void @s_shuffle_v3p0_v2p0__1_2_2() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3574,6 +3629,7 @@ define void @s_shuffle_v3p0_v2p0__1_2_2() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3587,6 +3643,7 @@ define void @s_shuffle_v3p0_v2p0__1_2_2() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3678,6 +3735,7 @@ define void @s_shuffle_v3p0_v2p0__3_2_2() {
define void @s_shuffle_v3p0_v2p0__3_u_2() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3693,6 +3751,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3708,6 +3767,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_2() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -4016,6 +4076,7 @@ define void @s_shuffle_v3p0_v2p0__2_3_3() {
define void @s_shuffle_v3p0_v2p0__3_u_3() {
; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -4031,6 +4092,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -4046,6 +4108,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_3() {
;
; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
index b6f4e3091b61f..56cd051df3e55 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
@@ -103,6 +103,7 @@ define void @v_shuffle_v3p0_v3p0__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -116,6 +117,7 @@ define void @v_shuffle_v3p0_v3p0__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -129,6 +131,7 @@ define void @v_shuffle_v3p0_v3p0__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -199,6 +202,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -212,6 +216,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -225,6 +230,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -884,11 +890,12 @@ define void @v_shuffle_v3p0_v3p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -898,11 +905,12 @@ define void @v_shuffle_v3p0_v3p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -912,11 +920,12 @@ define void @v_shuffle_v3p0_v3p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1082,11 +1091,12 @@ define void @v_shuffle_v3p0_v3p0__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1096,11 +1106,12 @@ define void @v_shuffle_v3p0_v3p0__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1110,11 +1121,12 @@ define void @v_shuffle_v3p0_v3p0__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1249,14 +1261,15 @@ define void @v_shuffle_v3p0_v3p0__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -1266,14 +1279,15 @@ define void @v_shuffle_v3p0_v3p0__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -1283,15 +1297,15 @@ define void @v_shuffle_v3p0_v3p0__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -1880,16 +1894,17 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1897,16 +1912,17 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1914,14 +1930,16 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -2524,6 +2542,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
@@ -2541,6 +2560,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
@@ -2558,6 +2578,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
@@ -2918,6 +2939,7 @@ define void @v_shuffle_v3p0_v3p0__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -2931,6 +2953,7 @@ define void @v_shuffle_v3p0_v3p0__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -2944,6 +2967,7 @@ define void @v_shuffle_v3p0_v3p0__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -3080,6 +3104,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -3094,6 +3119,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
@@ -3108,6 +3134,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -3711,13 +3738,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3725,13 +3753,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3739,13 +3768,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4307,6 +4337,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -4321,6 +4352,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -4335,6 +4367,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -4694,6 +4727,7 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -4707,6 +4741,7 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -4716,6 +4751,7 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4737,8 +4773,11 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -4748,8 +4787,11 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -4757,6 +4799,7 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4794,6 +4837,7 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -4807,6 +4851,7 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -4816,6 +4861,7 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4838,8 +4884,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -4849,8 +4898,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -4858,6 +4910,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -4883,10 +4936,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -4901,10 +4955,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4916,12 +4971,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -4990,13 +5048,16 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[20:25]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s24
+; GFX900-NEXT: s_mov_b32 s9, s25
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5006,13 +5067,16 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[20:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s24
+; GFX90A-NEXT: s_mov_b32 s9, s25
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5022,13 +5086,14 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -5045,12 +5110,15 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
define void @s_shuffle_v3p0_v3p0__5_3_u() {
; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5058,12 +5126,15 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5071,6 +5142,7 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5115,12 +5187,13 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5130,12 +5203,13 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5143,6 +5217,7 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5498,6 +5573,7 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -5513,6 +5589,7 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -5524,6 +5601,7 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5680,6 +5758,7 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -5695,6 +5774,7 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -5706,6 +5786,7 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -5864,10 +5945,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -5882,10 +5964,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -5897,12 +5980,15 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6390,10 +6476,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -6408,10 +6495,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -6423,12 +6511,15 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -7256,6 +7347,7 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -7269,6 +7361,7 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -7278,6 +7371,7 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7299,8 +7393,11 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7310,8 +7407,11 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7319,6 +7419,7 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7464,12 +7565,15 @@ define void @s_shuffle_v3p0_v3p0__5_3_3() {
define void @s_shuffle_v3p0_v3p0__5_u_3() {
; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7477,12 +7581,15 @@ define void @s_shuffle_v3p0_v3p0__5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7490,6 +7597,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_3() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8014,10 +8122,13 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8027,10 +8138,13 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8038,6 +8152,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() {
;
; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
index b03066e66cf66..d979f76e404ad 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
@@ -142,6 +142,7 @@ define void @v_shuffle_v3p0_v4p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -155,6 +156,7 @@ define void @v_shuffle_v3p0_v4p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -168,6 +170,7 @@ define void @v_shuffle_v3p0_v4p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -278,6 +281,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -291,6 +295,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -304,6 +309,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -1172,11 +1178,12 @@ define void @v_shuffle_v3p0_v4p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1186,11 +1193,12 @@ define void @v_shuffle_v3p0_v4p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1200,11 +1208,12 @@ define void @v_shuffle_v3p0_v4p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1418,11 +1427,12 @@ define void @v_shuffle_v3p0_v4p0__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1432,11 +1442,12 @@ define void @v_shuffle_v3p0_v4p0__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1446,11 +1457,12 @@ define void @v_shuffle_v3p0_v4p0__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1643,14 +1655,15 @@ define void @v_shuffle_v3p0_v4p0__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
@@ -1660,14 +1673,15 @@ define void @v_shuffle_v3p0_v4p0__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
@@ -1677,15 +1691,15 @@ define void @v_shuffle_v3p0_v4p0__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -2496,16 +2510,17 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v0, v10
; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2513,16 +2528,17 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v10
; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2530,14 +2546,16 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
@@ -3363,6 +3381,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
@@ -3380,6 +3399,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
@@ -3397,6 +3417,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
@@ -4225,6 +4246,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
@@ -4242,6 +4264,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
@@ -4259,6 +4282,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
@@ -4774,6 +4798,7 @@ define void @v_shuffle_v3p0_v4p0__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -4787,6 +4812,7 @@ define void @v_shuffle_v3p0_v4p0__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -4800,6 +4826,7 @@ define void @v_shuffle_v3p0_v4p0__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -4985,6 +5012,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -4999,6 +5027,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
@@ -5013,6 +5042,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -5830,13 +5860,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -5844,13 +5875,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -5858,13 +5890,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6689,6 +6722,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -6703,6 +6737,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -6717,6 +6752,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -7496,6 +7532,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -7510,6 +7547,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -7524,6 +7562,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -7990,6 +8029,7 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -8003,6 +8043,7 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -8012,6 +8053,7 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8033,8 +8075,11 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8044,8 +8089,11 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8053,6 +8101,7 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8074,10 +8123,11 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8087,10 +8137,11 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8098,6 +8149,7 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8135,6 +8187,7 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -8148,6 +8201,7 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -8157,6 +8211,7 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8179,8 +8234,11 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8190,8 +8248,11 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8199,6 +8260,7 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8221,10 +8283,11 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8234,10 +8297,11 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8245,6 +8309,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8272,6 +8337,7 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
@@ -8290,6 +8356,7 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
@@ -8303,14 +8370,14 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -8384,15 +8451,16 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8402,15 +8470,16 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8422,6 +8491,7 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
@@ -8445,13 +8515,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8461,13 +8534,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8479,6 +8555,7 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
@@ -8500,14 +8577,15 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
define void @s_shuffle_v3p0_v4p0__7_4_u() {
; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8515,14 +8593,15 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8530,6 +8609,7 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8574,12 +8654,13 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8589,12 +8670,13 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8602,6 +8684,7 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -8626,10 +8709,13 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8639,10 +8725,13 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8650,6 +8739,7 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -9114,6 +9204,7 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9129,6 +9220,7 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9140,6 +9232,7 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -9353,6 +9446,7 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9368,6 +9462,7 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9379,6 +9474,7 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -9606,6 +9702,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9624,6 +9721,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9637,14 +9735,15 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -10358,6 +10457,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s6
@@ -10376,6 +10476,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s6
@@ -10389,14 +10490,15 @@ define void @s_shuffle_v3p0_v4p0__7_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -11528,6 +11630,7 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
@@ -11543,6 +11646,7 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
@@ -11554,6 +11658,7 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -11771,6 +11876,7 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
@@ -11786,6 +11892,7 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
@@ -11797,6 +11904,7 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12017,15 +12125,16 @@ define void @s_shuffle_v3p0_v4p0__7_u_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12035,15 +12144,16 @@ define void @s_shuffle_v3p0_v4p0__7_u_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12055,6 +12165,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
@@ -12524,6 +12635,7 @@ define void @s_shuffle_v3p0_v4p0__1_4_4() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -12537,6 +12649,7 @@ define void @s_shuffle_v3p0_v4p0__1_4_4() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -12546,6 +12659,7 @@ define void @s_shuffle_v3p0_v4p0__1_4_4() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12567,8 +12681,11 @@ define void @s_shuffle_v3p0_v4p0__2_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12578,8 +12695,11 @@ define void @s_shuffle_v3p0_v4p0__2_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12587,6 +12707,7 @@ define void @s_shuffle_v3p0_v4p0__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12608,10 +12729,11 @@ define void @s_shuffle_v3p0_v4p0__3_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12621,10 +12743,11 @@ define void @s_shuffle_v3p0_v4p0__3_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12632,6 +12755,7 @@ define void @s_shuffle_v3p0_v4p0__3_4_4() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -12835,14 +12959,15 @@ define void @s_shuffle_v3p0_v4p0__7_4_4() {
define void @s_shuffle_v3p0_v4p0__7_u_4() {
; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -12850,14 +12975,15 @@ define void @s_shuffle_v3p0_v4p0__7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -12865,6 +12991,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_4() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13594,12 +13721,13 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -13609,12 +13737,13 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -13622,6 +13751,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14715,6 +14845,7 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
@@ -14730,6 +14861,7 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
@@ -14741,6 +14873,7 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_7_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15161,12 +15294,13 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -15176,12 +15310,13 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -15189,6 +15324,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() {
;
; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
index bd0100a4ffdb5..5ef6b0f8b057e 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
@@ -59,11 +59,12 @@ define void @v_shuffle_v3p3_v2p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -71,11 +72,12 @@ define void @v_shuffle_v3p3_v2p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -83,11 +85,12 @@ define void @v_shuffle_v3p3_v2p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -112,11 +115,12 @@ define void @v_shuffle_v3p3_v2p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -124,11 +128,12 @@ define void @v_shuffle_v3p3_v2p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -136,11 +141,12 @@ define void @v_shuffle_v3p3_v2p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -170,15 +176,16 @@ define void @v_shuffle_v3p3_v2p3__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -186,15 +193,16 @@ define void @v_shuffle_v3p3_v2p3__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -272,28 +280,30 @@ define void @v_shuffle_v3p3_v2p3__3_2_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3p3_v2p3__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -560,26 +570,29 @@ define void @v_shuffle_v3p3_v2p3__u_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3p3_v2p3__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v2p3__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -695,26 +708,29 @@ define void @v_shuffle_v3p3_v2p3__2_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3p3_v2p3__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v2p3__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -784,15 +800,16 @@ define void @v_shuffle_v3p3_v2p3__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -800,14 +817,16 @@ define void @v_shuffle_v3p3_v2p3__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -815,14 +834,16 @@ define void @v_shuffle_v3p3_v2p3__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1192,15 +1213,16 @@ define void @v_shuffle_v3p3_v2p3__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1208,15 +1230,16 @@ define void @v_shuffle_v3p3_v2p3__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1394,11 +1417,12 @@ define void @v_shuffle_v3p3_v2p3__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1406,11 +1430,12 @@ define void @v_shuffle_v3p3_v2p3__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v2, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1418,11 +1443,12 @@ define void @v_shuffle_v3p3_v2p3__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1492,39 +1518,44 @@ define void @v_shuffle_v3p3_v2p3__3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v2p3__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v2p3__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v2p3__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1857,12 +1888,13 @@ define void @v_shuffle_v3p3_v2p3__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1870,12 +1902,13 @@ define void @v_shuffle_v3p3_v2p3__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[2:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2101,6 +2134,7 @@ define void @s_shuffle_v3p3_v2p3__0_u_u() {
define void @s_shuffle_v3p3_v2p3__1_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2113,6 +2147,7 @@ define void @s_shuffle_v3p3_v2p3__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2125,6 +2160,7 @@ define void @s_shuffle_v3p3_v2p3__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2157,6 +2193,7 @@ define void @s_shuffle_v3p3_v2p3__2_u_u() {
define void @s_shuffle_v3p3_v2p3__3_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2169,6 +2206,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2181,6 +2219,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2200,6 +2239,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_u() {
define void @s_shuffle_v3p3_v2p3__3_0_u() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2216,6 +2256,7 @@ define void @s_shuffle_v3p3_v2p3__3_0_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2232,6 +2273,7 @@ define void @s_shuffle_v3p3_v2p3__3_0_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2307,6 +2349,7 @@ define void @s_shuffle_v3p3_v2p3__3_1_u() {
define void @s_shuffle_v3p3_v2p3__3_2_u() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2320,6 +2363,7 @@ define void @s_shuffle_v3p3_v2p3__3_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2333,6 +2377,7 @@ define void @s_shuffle_v3p3_v2p3__3_2_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2551,6 +2596,7 @@ define void @s_shuffle_v3p3_v2p3__3_3_3() {
define void @s_shuffle_v3p3_v2p3__u_0_0() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2564,6 +2610,7 @@ define void @s_shuffle_v3p3_v2p3__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2577,6 +2624,7 @@ define void @s_shuffle_v3p3_v2p3__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2663,6 +2711,7 @@ define void @s_shuffle_v3p3_v2p3__1_0_0() {
define void @s_shuffle_v3p3_v2p3__2_0_0() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__2_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2676,6 +2725,7 @@ define void @s_shuffle_v3p3_v2p3__2_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__2_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2689,6 +2739,7 @@ define void @s_shuffle_v3p3_v2p3__2_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__2_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -2766,6 +2817,7 @@ define void @s_shuffle_v3p3_v2p3__3_0_0() {
define void @s_shuffle_v3p3_v2p3__3_u_0() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -2782,6 +2834,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -2798,6 +2851,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3065,6 +3119,7 @@ define void @s_shuffle_v3p3_v2p3__3_1_1() {
define void @s_shuffle_v3p3_v2p3__3_u_1() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3081,6 +3136,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_1() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3097,6 +3153,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_1() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3290,6 +3347,7 @@ define void @s_shuffle_v3p3_v2p3__0_2_2() {
define void @s_shuffle_v3p3_v2p3__1_2_2() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__1_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3302,6 +3360,7 @@ define void @s_shuffle_v3p3_v2p3__1_2_2() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__1_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3314,6 +3373,7 @@ define void @s_shuffle_v3p3_v2p3__1_2_2() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__1_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3395,6 +3455,7 @@ define void @s_shuffle_v3p3_v2p3__3_2_2() {
define void @s_shuffle_v3p3_v2p3__3_u_2() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3408,6 +3469,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3421,6 +3483,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_2() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3702,6 +3765,7 @@ define void @s_shuffle_v3p3_v2p3__2_3_3() {
define void @s_shuffle_v3p3_v2p3__3_u_3() {
; GFX900-LABEL: s_shuffle_v3p3_v2p3__3_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3715,6 +3779,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v2p3__3_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3728,6 +3793,7 @@ define void @s_shuffle_v3p3_v2p3__3_u_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
index cecd2a0e4b015..8687438adcef3 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
@@ -61,9 +61,10 @@ define void @v_shuffle_v3p3_v3p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -73,9 +74,10 @@ define void @v_shuffle_v3p3_v3p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -85,9 +87,10 @@ define void @v_shuffle_v3p3_v3p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -99,37 +102,41 @@ define void @v_shuffle_v3p3_v3p3__1_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -156,9 +163,10 @@ define void @v_shuffle_v3p3_v3p3__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -168,9 +176,10 @@ define void @v_shuffle_v3p3_v3p3__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -180,9 +189,10 @@ define void @v_shuffle_v3p3_v3p3__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -195,37 +205,41 @@ define void @v_shuffle_v3p3_v3p3__4_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -242,13 +256,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -258,13 +273,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -274,14 +290,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -347,49 +363,53 @@ define void @v_shuffle_v3p3_v3p3__5_1_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v2
+; GFX900-NEXT: global_store_dwordx3 v9, v[6:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[8:10], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[8:10], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -402,39 +422,44 @@ define void @v_shuffle_v3p3_v3p3__5_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -490,40 +515,44 @@ define void @v_shuffle_v3p3_v3p3__5_4_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -859,10 +888,11 @@ define void @v_shuffle_v3p3_v3p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,10 +902,11 @@ define void @v_shuffle_v3p3_v3p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -885,10 +916,11 @@ define void @v_shuffle_v3p3_v3p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1043,10 +1075,11 @@ define void @v_shuffle_v3p3_v3p3__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1056,10 +1089,11 @@ define void @v_shuffle_v3p3_v3p3__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1069,10 +1103,11 @@ define void @v_shuffle_v3p3_v3p3__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1206,13 +1241,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1222,13 +1258,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1238,14 +1275,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1818,13 +1855,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,13 +1872,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1850,14 +1889,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2754,9 +2793,10 @@ define void @v_shuffle_v3p3_v3p3__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2766,9 +2806,10 @@ define void @v_shuffle_v3p3_v3p3__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2778,9 +2819,10 @@ define void @v_shuffle_v3p3_v3p3__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2792,37 +2834,41 @@ define void @v_shuffle_v3p3_v3p3__1_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2941,39 +2987,44 @@ define void @v_shuffle_v3p3_v3p3__5_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3544,40 +3595,44 @@ define void @v_shuffle_v3p3_v3p3__5_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v3p3__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4462,6 +4517,7 @@ define void @s_shuffle_v3p3_v3p3__0_u_u() {
define void @s_shuffle_v3p3_v3p3__1_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4474,6 +4530,7 @@ define void @s_shuffle_v3p3_v3p3__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4486,6 +4543,7 @@ define void @s_shuffle_v3p3_v3p3__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4504,6 +4562,7 @@ define void @s_shuffle_v3p3_v3p3__1_u_u() {
define void @s_shuffle_v3p3_v3p3__2_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4516,6 +4575,7 @@ define void @s_shuffle_v3p3_v3p3__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4528,6 +4588,7 @@ define void @s_shuffle_v3p3_v3p3__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4560,6 +4621,7 @@ define void @s_shuffle_v3p3_v3p3__3_u_u() {
define void @s_shuffle_v3p3_v3p3__4_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4572,6 +4634,7 @@ define void @s_shuffle_v3p3_v3p3__4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4584,6 +4647,7 @@ define void @s_shuffle_v3p3_v3p3__4_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4603,6 +4667,7 @@ define void @s_shuffle_v3p3_v3p3__4_u_u() {
define void @s_shuffle_v3p3_v3p3__5_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4615,6 +4680,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4627,6 +4693,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4646,14 +4713,15 @@ define void @s_shuffle_v3p3_v3p3__5_u_u() {
define void @s_shuffle_v3p3_v3p3__5_0_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -4662,14 +4730,15 @@ define void @s_shuffle_v3p3_v3p3__5_0_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -4678,6 +4747,7 @@ define void @s_shuffle_v3p3_v3p3__5_0_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4753,14 +4823,15 @@ define void @s_shuffle_v3p3_v3p3__5_1_u() {
define void @s_shuffle_v3p3_v3p3__5_2_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_2_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -4769,14 +4840,15 @@ define void @s_shuffle_v3p3_v3p3__5_2_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_2_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -4785,6 +4857,7 @@ define void @s_shuffle_v3p3_v3p3__5_2_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4808,6 +4881,7 @@ define void @s_shuffle_v3p3_v3p3__5_2_u() {
define void @s_shuffle_v3p3_v3p3__5_3_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4821,6 +4895,7 @@ define void @s_shuffle_v3p3_v3p3__5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4834,6 +4909,7 @@ define void @s_shuffle_v3p3_v3p3__5_3_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -4873,6 +4949,7 @@ define void @s_shuffle_v3p3_v3p3__5_4_u() {
define void @s_shuffle_v3p3_v3p3__5_5_u() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -4886,6 +4963,7 @@ define void @s_shuffle_v3p3_v3p3__5_5_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -4899,6 +4977,7 @@ define void @s_shuffle_v3p3_v3p3__5_5_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5208,6 +5287,7 @@ define void @s_shuffle_v3p3_v3p3__5_5_5() {
define void @s_shuffle_v3p3_v3p3__u_0_0() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -5221,6 +5301,7 @@ define void @s_shuffle_v3p3_v3p3__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -5234,6 +5315,7 @@ define void @s_shuffle_v3p3_v3p3__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5368,6 +5450,7 @@ define void @s_shuffle_v3p3_v3p3__2_0_0() {
define void @s_shuffle_v3p3_v3p3__3_0_0() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__3_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -5381,6 +5464,7 @@ define void @s_shuffle_v3p3_v3p3__3_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__3_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -5394,6 +5478,7 @@ define void @s_shuffle_v3p3_v3p3__3_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__3_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -5529,14 +5614,15 @@ define void @s_shuffle_v3p3_v3p3__5_0_0() {
define void @s_shuffle_v3p3_v3p3__5_u_0() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -5545,14 +5631,15 @@ define void @s_shuffle_v3p3_v3p3__5_u_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -5561,6 +5648,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6015,14 +6103,15 @@ define void @s_shuffle_v3p3_v3p3__5_1_1() {
define void @s_shuffle_v3p3_v3p3__5_u_1() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:10]
@@ -6031,14 +6120,15 @@ define void @s_shuffle_v3p3_v3p3__5_u_1() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:10]
@@ -6047,6 +6137,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_1() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6827,6 +6918,7 @@ define void @s_shuffle_v3p3_v3p3__0_3_3() {
define void @s_shuffle_v3p3_v3p3__1_3_3() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__1_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6839,6 +6931,7 @@ define void @s_shuffle_v3p3_v3p3__1_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__1_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6851,6 +6944,7 @@ define void @s_shuffle_v3p3_v3p3__1_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -6869,6 +6963,7 @@ define void @s_shuffle_v3p3_v3p3__1_3_3() {
define void @s_shuffle_v3p3_v3p3__2_3_3() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__2_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6881,6 +6976,7 @@ define void @s_shuffle_v3p3_v3p3__2_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__2_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6893,6 +6989,7 @@ define void @s_shuffle_v3p3_v3p3__2_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7023,6 +7120,7 @@ define void @s_shuffle_v3p3_v3p3__5_3_3() {
define void @s_shuffle_v3p3_v3p3__5_u_3() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7036,6 +7134,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7049,6 +7148,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7532,6 +7632,7 @@ define void @s_shuffle_v3p3_v3p3__5_4_4() {
define void @s_shuffle_v3p3_v3p3__5_u_4() {
; GFX900-LABEL: s_shuffle_v3p3_v3p3__5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7545,6 +7646,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v3p3__5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7558,6 +7660,7 @@ define void @s_shuffle_v3p3_v3p3__5_u_4() {
;
; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
index 834f03f013ba1..734d7deca4f13 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
@@ -72,9 +72,10 @@ define void @v_shuffle_v3p3_v4p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -84,9 +85,10 @@ define void @v_shuffle_v3p3_v4p3__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -102,33 +104,37 @@ define void @v_shuffle_v3p3_v4p3__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -144,6 +150,7 @@ define void @v_shuffle_v3p3_v4p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -156,6 +163,7 @@ define void @v_shuffle_v3p3_v4p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -168,6 +176,7 @@ define void @v_shuffle_v3p3_v4p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -208,9 +217,10 @@ define void @v_shuffle_v3p3_v4p3__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -220,9 +230,10 @@ define void @v_shuffle_v3p3_v4p3__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -239,33 +250,37 @@ define void @v_shuffle_v3p3_v4p3__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -282,6 +297,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -294,6 +310,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -306,6 +323,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -328,6 +346,7 @@ define void @v_shuffle_v3p3_v4p3__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v2, v0
@@ -344,6 +363,7 @@ define void @v_shuffle_v3p3_v4p3__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
@@ -361,6 +381,7 @@ define void @v_shuffle_v3p3_v4p3__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v3, v0
@@ -447,15 +468,16 @@ define void @v_shuffle_v3p3_v4p3__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -463,15 +485,16 @@ define void @v_shuffle_v3p3_v4p3__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -488,6 +511,7 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
@@ -504,6 +528,7 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
@@ -520,6 +545,7 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
@@ -539,40 +565,44 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v4p3__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -640,27 +670,30 @@ define void @v_shuffle_v3p3_v4p3__7_6_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_6_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -677,6 +710,7 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: v_mov_b32_e32 v1, v3
@@ -690,6 +724,7 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
@@ -703,6 +738,7 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: v_mov_b32_e32 v1, v3
@@ -1141,10 +1177,11 @@ define void @v_shuffle_v3p3_v4p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1154,10 +1191,11 @@ define void @v_shuffle_v3p3_v4p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1167,10 +1205,11 @@ define void @v_shuffle_v3p3_v4p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1372,10 +1411,11 @@ define void @v_shuffle_v3p3_v4p3__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1385,10 +1425,11 @@ define void @v_shuffle_v3p3_v4p3__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1398,10 +1439,11 @@ define void @v_shuffle_v3p3_v4p3__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1596,6 +1638,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v0
@@ -1612,6 +1655,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
@@ -1629,6 +1673,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -2425,10 +2470,11 @@ define void @v_shuffle_v3p3_v4p3__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2441,10 +2487,11 @@ define void @v_shuffle_v3p3_v4p3__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2458,10 +2505,11 @@ define void @v_shuffle_v3p3_v4p3__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3645,6 +3693,7 @@ define void @v_shuffle_v3p3_v4p3__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -3658,6 +3707,7 @@ define void @v_shuffle_v3p3_v4p3__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -3874,6 +3924,7 @@ define void @v_shuffle_v3p3_v4p3__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -3887,6 +3938,7 @@ define void @v_shuffle_v3p3_v4p3__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -4092,6 +4144,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
@@ -4108,6 +4161,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
@@ -4529,9 +4583,10 @@ define void @v_shuffle_v3p3_v4p3__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4541,9 +4596,10 @@ define void @v_shuffle_v3p3_v4p3__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4559,33 +4615,37 @@ define void @v_shuffle_v3p3_v4p3__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4601,6 +4661,7 @@ define void @v_shuffle_v3p3_v4p3__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -4613,6 +4674,7 @@ define void @v_shuffle_v3p3_v4p3__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
@@ -4625,6 +4687,7 @@ define void @v_shuffle_v3p3_v4p3__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
@@ -4794,40 +4857,44 @@ define void @v_shuffle_v3p3_v4p3__7_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v4p3__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5608,40 +5675,44 @@ define void @v_shuffle_v3p3_v4p3__7_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v3p3_v4p3__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6800,6 +6871,7 @@ define void @v_shuffle_v3p3_v4p3__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -6813,6 +6885,7 @@ define void @v_shuffle_v3p3_v4p3__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -7213,6 +7286,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
@@ -7226,6 +7300,7 @@ define void @v_shuffle_v3p3_v4p3__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2
; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v3
; GFX942-NEXT: v_mov_b32_e32 v2, v3
@@ -7666,6 +7741,7 @@ define void @s_shuffle_v3p3_v4p3__0_u_u() {
define void @s_shuffle_v3p3_v4p3__1_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__1_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7678,6 +7754,7 @@ define void @s_shuffle_v3p3_v4p3__1_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__1_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7690,6 +7767,7 @@ define void @s_shuffle_v3p3_v4p3__1_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7708,6 +7786,7 @@ define void @s_shuffle_v3p3_v4p3__1_u_u() {
define void @s_shuffle_v3p3_v4p3__2_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7720,6 +7799,7 @@ define void @s_shuffle_v3p3_v4p3__2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7732,6 +7812,7 @@ define void @s_shuffle_v3p3_v4p3__2_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7750,6 +7831,7 @@ define void @s_shuffle_v3p3_v4p3__2_u_u() {
define void @s_shuffle_v3p3_v4p3__3_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7762,6 +7844,7 @@ define void @s_shuffle_v3p3_v4p3__3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7774,6 +7857,7 @@ define void @s_shuffle_v3p3_v4p3__3_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7806,6 +7890,7 @@ define void @s_shuffle_v3p3_v4p3__4_u_u() {
define void @s_shuffle_v3p3_v4p3__5_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7818,6 +7903,7 @@ define void @s_shuffle_v3p3_v4p3__5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7830,6 +7916,7 @@ define void @s_shuffle_v3p3_v4p3__5_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7849,6 +7936,7 @@ define void @s_shuffle_v3p3_v4p3__5_u_u() {
define void @s_shuffle_v3p3_v4p3__6_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7861,6 +7949,7 @@ define void @s_shuffle_v3p3_v4p3__6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7873,6 +7962,7 @@ define void @s_shuffle_v3p3_v4p3__6_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7892,6 +7982,7 @@ define void @s_shuffle_v3p3_v4p3__6_u_u() {
define void @s_shuffle_v3p3_v4p3__7_u_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -7904,6 +7995,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -7916,6 +8008,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -7939,6 +8032,7 @@ define void @s_shuffle_v3p3_v4p3__7_0_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -7955,6 +8049,7 @@ define void @s_shuffle_v3p3_v4p3__7_0_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -7967,6 +8062,7 @@ define void @s_shuffle_v3p3_v4p3__7_0_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8046,6 +8142,7 @@ define void @s_shuffle_v3p3_v4p3__7_2_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -8062,6 +8159,7 @@ define void @s_shuffle_v3p3_v4p3__7_2_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -8074,6 +8172,7 @@ define void @s_shuffle_v3p3_v4p3__7_2_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8101,6 +8200,7 @@ define void @s_shuffle_v3p3_v4p3__7_3_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -8117,6 +8217,7 @@ define void @s_shuffle_v3p3_v4p3__7_3_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -8129,6 +8230,7 @@ define void @s_shuffle_v3p3_v4p3__7_3_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8152,6 +8254,7 @@ define void @s_shuffle_v3p3_v4p3__7_3_u() {
define void @s_shuffle_v3p3_v4p3__7_4_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8165,6 +8268,7 @@ define void @s_shuffle_v3p3_v4p3__7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8178,6 +8282,7 @@ define void @s_shuffle_v3p3_v4p3__7_4_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8217,6 +8322,7 @@ define void @s_shuffle_v3p3_v4p3__7_5_u() {
define void @s_shuffle_v3p3_v4p3__7_6_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_6_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8230,6 +8336,7 @@ define void @s_shuffle_v3p3_v4p3__7_6_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_6_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8243,6 +8350,7 @@ define void @s_shuffle_v3p3_v4p3__7_6_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8263,6 +8371,7 @@ define void @s_shuffle_v3p3_v4p3__7_6_u() {
define void @s_shuffle_v3p3_v4p3__7_7_u() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8276,6 +8385,7 @@ define void @s_shuffle_v3p3_v4p3__7_7_u() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8289,6 +8399,7 @@ define void @s_shuffle_v3p3_v4p3__7_7_u() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8705,6 +8816,7 @@ define void @s_shuffle_v3p3_v4p3__7_7_7() {
define void @s_shuffle_v3p3_v4p3__u_0_0() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8718,6 +8830,7 @@ define void @s_shuffle_v3p3_v4p3__u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8731,6 +8844,7 @@ define void @s_shuffle_v3p3_v4p3__u_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -8913,6 +9027,7 @@ define void @s_shuffle_v3p3_v4p3__3_0_0() {
define void @s_shuffle_v3p3_v4p3__4_0_0() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__4_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -8926,6 +9041,7 @@ define void @s_shuffle_v3p3_v4p3__4_0_0() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__4_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -8939,6 +9055,7 @@ define void @s_shuffle_v3p3_v4p3__4_0_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__4_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -9136,6 +9253,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -9152,6 +9270,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -9164,6 +9283,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_0() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -9812,6 +9932,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -9828,6 +9949,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -9840,6 +9962,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_1() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -10866,6 +10989,7 @@ define void @s_shuffle_v3p3_v4p3__7_6_2() {
define void @s_shuffle_v3p3_v4p3__u_3_3() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -10879,6 +11003,7 @@ define void @s_shuffle_v3p3_v4p3__u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -10892,6 +11017,7 @@ define void @s_shuffle_v3p3_v4p3__u_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11074,6 +11200,7 @@ define void @s_shuffle_v3p3_v4p3__3_3_3() {
define void @s_shuffle_v3p3_v4p3__4_3_3() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__4_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11087,6 +11214,7 @@ define void @s_shuffle_v3p3_v4p3__4_3_3() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__4_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11100,6 +11228,7 @@ define void @s_shuffle_v3p3_v4p3__4_3_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__4_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11297,6 +11426,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
@@ -11313,6 +11443,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
@@ -11325,6 +11456,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_3() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11744,6 +11876,7 @@ define void @s_shuffle_v3p3_v4p3__0_4_4() {
define void @s_shuffle_v3p3_v4p3__1_4_4() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__1_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11756,6 +11889,7 @@ define void @s_shuffle_v3p3_v4p3__1_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__1_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11768,6 +11902,7 @@ define void @s_shuffle_v3p3_v4p3__1_4_4() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11786,6 +11921,7 @@ define void @s_shuffle_v3p3_v4p3__1_4_4() {
define void @s_shuffle_v3p3_v4p3__2_4_4() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__2_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11798,6 +11934,7 @@ define void @s_shuffle_v3p3_v4p3__2_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__2_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11810,6 +11947,7 @@ define void @s_shuffle_v3p3_v4p3__2_4_4() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -11828,6 +11966,7 @@ define void @s_shuffle_v3p3_v4p3__2_4_4() {
define void @s_shuffle_v3p3_v4p3__3_4_4() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__3_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11840,6 +11979,7 @@ define void @s_shuffle_v3p3_v4p3__3_4_4() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__3_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11852,6 +11992,7 @@ define void @s_shuffle_v3p3_v4p3__3_4_4() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12031,6 +12172,7 @@ define void @s_shuffle_v3p3_v4p3__7_4_4() {
define void @s_shuffle_v3p3_v4p3__7_u_4() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12044,6 +12186,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12057,6 +12200,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_4() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12722,6 +12866,7 @@ define void @s_shuffle_v3p3_v4p3__7_5_5() {
define void @s_shuffle_v3p3_v4p3__7_u_5() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12735,6 +12880,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12748,6 +12894,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_5() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13720,6 +13867,7 @@ define void @s_shuffle_v3p3_v4p3__7_5_6() {
define void @s_shuffle_v3p3_v4p3__u_7_7() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__u_7_7:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13733,6 +13881,7 @@ define void @s_shuffle_v3p3_v4p3__u_7_7() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__u_7_7:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13746,6 +13895,7 @@ define void @s_shuffle_v3p3_v4p3__u_7_7() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__u_7_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -14113,6 +14263,7 @@ define void @s_shuffle_v3p3_v4p3__6_7_7() {
define void @s_shuffle_v3p3_v4p3__7_u_7() {
; GFX900-LABEL: s_shuffle_v3p3_v4p3__7_u_7:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -14126,6 +14277,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_7() {
;
; GFX90A-LABEL: s_shuffle_v3p3_v4p3__7_u_7:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -14139,6 +14291,7 @@ define void @s_shuffle_v3p3_v4p3__7_u_7() {
;
; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_7:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll
index fa422e48bbce0..830d1a1c7fef8 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll
@@ -102,37 +102,41 @@ define void @v_shuffle_v4bf16_v3bf16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4bf16_v3bf16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4bf16_v3bf16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4bf16_v3bf16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x bfloat> asm "; def $0", "=v"()
@@ -202,37 +206,41 @@ define void @v_shuffle_v4bf16_v3bf16__4_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4bf16_v3bf16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4bf16_v3bf16__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4bf16_v3bf16__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x bfloat> asm "; def $0", "=v"()
@@ -4458,37 +4466,41 @@ define void @v_shuffle_v4bf16_v3bf16__1_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4bf16_v3bf16__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4bf16_v3bf16__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4bf16_v3bf16__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x bfloat> asm "; def $0", "=v"()
@@ -7390,6 +7402,7 @@ define void @s_shuffle_v4bf16_v3bf16__1_u_u_u() {
define void @s_shuffle_v4bf16_v3bf16__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4bf16_v3bf16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7402,6 +7415,7 @@ define void @s_shuffle_v4bf16_v3bf16__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4bf16_v3bf16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7414,6 +7428,7 @@ define void @s_shuffle_v4bf16_v3bf16__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -7493,6 +7508,7 @@ define void @s_shuffle_v4bf16_v3bf16__4_u_u_u() {
define void @s_shuffle_v4bf16_v3bf16__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4bf16_v3bf16__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7505,6 +7521,7 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4bf16_v3bf16__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7517,6 +7534,7 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -11543,6 +11561,7 @@ define void @s_shuffle_v4bf16_v3bf16__1_3_3_3() {
define void @s_shuffle_v4bf16_v3bf16__2_3_3_3() {
; GFX900-LABEL: s_shuffle_v4bf16_v3bf16__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -11555,6 +11574,7 @@ define void @s_shuffle_v4bf16_v3bf16__2_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4bf16_v3bf16__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -11567,6 +11587,7 @@ define void @s_shuffle_v4bf16_v3bf16__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll
index ab297c02fe3b5..a9427d66595e0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll
@@ -99,37 +99,41 @@ define void @v_shuffle_v4bf16_v4bf16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4bf16_v4bf16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4bf16_v4bf16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4bf16_v4bf16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x bfloat> asm "; def $0", "=v"()
@@ -237,37 +241,41 @@ define void @v_shuffle_v4bf16_v4bf16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4bf16_v4bf16__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4bf16_v4bf16__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4bf16_v4bf16__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x bfloat> asm "; def $0", "=v"()
@@ -7087,37 +7095,41 @@ define void @v_shuffle_v4bf16_v4bf16__1_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4bf16_v4bf16__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4bf16_v4bf16__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4bf16_v4bf16__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x bfloat> asm "; def $0", "=v"()
@@ -12198,6 +12210,7 @@ define void @s_shuffle_v4bf16_v4bf16__1_u_u_u() {
define void @s_shuffle_v4bf16_v4bf16__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4bf16_v4bf16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12210,6 +12223,7 @@ define void @s_shuffle_v4bf16_v4bf16__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4bf16_v4bf16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12222,6 +12236,7 @@ define void @s_shuffle_v4bf16_v4bf16__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -12339,6 +12354,7 @@ define void @s_shuffle_v4bf16_v4bf16__5_u_u_u() {
define void @s_shuffle_v4bf16_v4bf16__6_u_u_u() {
; GFX900-LABEL: s_shuffle_v4bf16_v4bf16__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12351,6 +12367,7 @@ define void @s_shuffle_v4bf16_v4bf16__6_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4bf16_v4bf16__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12363,6 +12380,7 @@ define void @s_shuffle_v4bf16_v4bf16__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -19217,6 +19235,7 @@ define void @s_shuffle_v4bf16_v4bf16__1_4_4_4() {
define void @s_shuffle_v4bf16_v4bf16__2_4_4_4() {
; GFX900-LABEL: s_shuffle_v4bf16_v4bf16__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -19229,6 +19248,7 @@ define void @s_shuffle_v4bf16_v4bf16__2_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4bf16_v4bf16__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -19241,6 +19261,7 @@ define void @s_shuffle_v4bf16_v4bf16__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll
index e91433ac4c1f7..826ae71ee386a 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll
@@ -102,37 +102,41 @@ define void @v_shuffle_v4f16_v3f16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f16_v3f16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f16_v3f16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f16_v3f16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x half> asm "; def $0", "=v"()
@@ -202,37 +206,41 @@ define void @v_shuffle_v4f16_v3f16__4_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f16_v3f16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f16_v3f16__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f16_v3f16__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x half> asm "; def $0", "=v"()
@@ -4458,37 +4466,41 @@ define void @v_shuffle_v4f16_v3f16__1_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f16_v3f16__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f16_v3f16__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f16_v3f16__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x half> asm "; def $0", "=v"()
@@ -7390,6 +7402,7 @@ define void @s_shuffle_v4f16_v3f16__1_u_u_u() {
define void @s_shuffle_v4f16_v3f16__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f16_v3f16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7402,6 +7415,7 @@ define void @s_shuffle_v4f16_v3f16__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f16_v3f16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7414,6 +7428,7 @@ define void @s_shuffle_v4f16_v3f16__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -7493,6 +7508,7 @@ define void @s_shuffle_v4f16_v3f16__4_u_u_u() {
define void @s_shuffle_v4f16_v3f16__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f16_v3f16__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7505,6 +7521,7 @@ define void @s_shuffle_v4f16_v3f16__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f16_v3f16__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7517,6 +7534,7 @@ define void @s_shuffle_v4f16_v3f16__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -11543,6 +11561,7 @@ define void @s_shuffle_v4f16_v3f16__1_3_3_3() {
define void @s_shuffle_v4f16_v3f16__2_3_3_3() {
; GFX900-LABEL: s_shuffle_v4f16_v3f16__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -11555,6 +11574,7 @@ define void @s_shuffle_v4f16_v3f16__2_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4f16_v3f16__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -11567,6 +11587,7 @@ define void @s_shuffle_v4f16_v3f16__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll
index 47100b9983559..1805854ef7206 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll
@@ -99,37 +99,41 @@ define void @v_shuffle_v4f16_v4f16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f16_v4f16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f16_v4f16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f16_v4f16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x half> asm "; def $0", "=v"()
@@ -237,37 +241,41 @@ define void @v_shuffle_v4f16_v4f16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f16_v4f16__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f16_v4f16__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f16_v4f16__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x half> asm "; def $0", "=v"()
@@ -7087,37 +7095,41 @@ define void @v_shuffle_v4f16_v4f16__1_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f16_v4f16__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f16_v4f16__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f16_v4f16__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x half> asm "; def $0", "=v"()
@@ -12198,6 +12210,7 @@ define void @s_shuffle_v4f16_v4f16__1_u_u_u() {
define void @s_shuffle_v4f16_v4f16__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f16_v4f16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12210,6 +12223,7 @@ define void @s_shuffle_v4f16_v4f16__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f16_v4f16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12222,6 +12236,7 @@ define void @s_shuffle_v4f16_v4f16__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -12339,6 +12354,7 @@ define void @s_shuffle_v4f16_v4f16__5_u_u_u() {
define void @s_shuffle_v4f16_v4f16__6_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f16_v4f16__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12351,6 +12367,7 @@ define void @s_shuffle_v4f16_v4f16__6_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f16_v4f16__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12363,6 +12380,7 @@ define void @s_shuffle_v4f16_v4f16__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -19217,6 +19235,7 @@ define void @s_shuffle_v4f16_v4f16__1_4_4_4() {
define void @s_shuffle_v4f16_v4f16__2_4_4_4() {
; GFX900-LABEL: s_shuffle_v4f16_v4f16__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -19229,6 +19248,7 @@ define void @s_shuffle_v4f16_v4f16__2_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4f16_v4f16__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -19241,6 +19261,7 @@ define void @s_shuffle_v4f16_v4f16__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
index df148f299a165..4a37da3ca5cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
@@ -59,35 +59,39 @@ define void @v_shuffle_v4f32_v2f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v2f32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -112,35 +116,39 @@ define void @v_shuffle_v4f32_v2f32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v2f32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -271,28 +279,30 @@ define void @v_shuffle_v4f32_v2f32__3_2_u_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v2f32__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -2062,35 +2072,39 @@ define void @v_shuffle_v4f32_v2f32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v2f32__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -2365,43 +2379,47 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v2f32__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v2f32__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x float> asm "; def $0", "=v"()
@@ -3195,6 +3213,7 @@ define void @s_shuffle_v4f32_v2f32__0_u_u_u() {
define void @s_shuffle_v4f32_v2f32__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v2f32__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3207,6 +3226,7 @@ define void @s_shuffle_v4f32_v2f32__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v2f32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3219,6 +3239,7 @@ define void @s_shuffle_v4f32_v2f32__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v2f32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3251,6 +3272,7 @@ define void @s_shuffle_v4f32_v2f32__2_u_u_u() {
define void @s_shuffle_v4f32_v2f32__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v2f32__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3263,6 +3285,7 @@ define void @s_shuffle_v4f32_v2f32__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v2f32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3275,6 +3298,7 @@ define void @s_shuffle_v4f32_v2f32__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3294,6 +3318,7 @@ define void @s_shuffle_v4f32_v2f32__3_u_u_u() {
define void @s_shuffle_v4f32_v2f32__3_0_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v2f32__3_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3310,6 +3335,7 @@ define void @s_shuffle_v4f32_v2f32__3_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v2f32__3_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3326,6 +3352,7 @@ define void @s_shuffle_v4f32_v2f32__3_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3401,6 +3428,7 @@ define void @s_shuffle_v4f32_v2f32__3_1_u_u() {
define void @s_shuffle_v4f32_v2f32__3_2_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v2f32__3_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3414,6 +3442,7 @@ define void @s_shuffle_v4f32_v2f32__3_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v2f32__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3427,6 +3456,7 @@ define void @s_shuffle_v4f32_v2f32__3_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4723,6 +4753,7 @@ define void @s_shuffle_v4f32_v2f32__0_2_2_2() {
define void @s_shuffle_v4f32_v2f32__1_2_2_2() {
; GFX900-LABEL: s_shuffle_v4f32_v2f32__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4735,6 +4766,7 @@ define void @s_shuffle_v4f32_v2f32__1_2_2_2() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v2f32__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4747,6 +4779,7 @@ define void @s_shuffle_v4f32_v2f32__1_2_2_2() {
;
; GFX942-LABEL: s_shuffle_v4f32_v2f32__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4922,6 +4955,7 @@ define void @s_shuffle_v4f32_v2f32__3_3_2_2() {
define void @s_shuffle_v4f32_v2f32__3_3_u_2() {
; GFX900-LABEL: s_shuffle_v4f32_v2f32__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4936,6 +4970,7 @@ define void @s_shuffle_v4f32_v2f32__3_3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v2f32__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4950,6 +4985,7 @@ define void @s_shuffle_v4f32_v2f32__3_3_u_2() {
;
; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
index d4ee6fa20cad8..ed2998ca2fef0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
@@ -59,35 +59,39 @@ define void @v_shuffle_v4f32_v3f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -101,11 +105,12 @@ define void @v_shuffle_v4f32_v3f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -113,11 +118,12 @@ define void @v_shuffle_v4f32_v3f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -125,11 +131,12 @@ define void @v_shuffle_v4f32_v3f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -154,35 +161,39 @@ define void @v_shuffle_v4f32_v3f32__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__4_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -197,11 +208,12 @@ define void @v_shuffle_v4f32_v3f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -209,11 +221,12 @@ define void @v_shuffle_v4f32_v3f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -221,11 +234,12 @@ define void @v_shuffle_v4f32_v3f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -255,15 +269,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -271,15 +286,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -347,15 +363,16 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -363,15 +380,16 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -379,15 +397,16 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -412,28 +431,30 @@ define void @v_shuffle_v4f32_v3f32__5_3_u_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -491,12 +512,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -504,12 +526,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -517,12 +540,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -537,15 +561,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -553,15 +579,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -569,16 +597,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -609,16 +638,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,17 +656,18 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -705,41 +736,47 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v3f32__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v6
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -765,29 +802,32 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1179,29 +1219,32 @@ define void @v_shuffle_v4f32_v3f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1374,29 +1417,32 @@ define void @v_shuffle_v4f32_v3f32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__3_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1529,16 +1575,17 @@ define void @v_shuffle_v4f32_v3f32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1546,16 +1593,17 @@ define void @v_shuffle_v4f32_v3f32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,16 +1611,17 @@ define void @v_shuffle_v4f32_v3f32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1884,16 +1933,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1901,16 +1951,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1918,16 +1969,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2545,16 +2597,17 @@ define void @v_shuffle_v4f32_v3f32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2562,17 +2615,18 @@ define void @v_shuffle_v4f32_v3f32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2876,16 +2930,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2893,15 +2948,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2909,16 +2966,18 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3268,29 +3327,31 @@ define void @v_shuffle_v4f32_v3f32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3416,8 +3477,9 @@ define void @v_shuffle_v4f32_v3f32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
@@ -3433,6 +3495,7 @@ define void @v_shuffle_v4f32_v3f32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
@@ -3871,16 +3934,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3888,16 +3952,17 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3986,14 +4051,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
@@ -4004,14 +4070,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
@@ -4108,8 +4175,9 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
@@ -4126,6 +4194,7 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
@@ -4200,35 +4269,39 @@ define void @v_shuffle_v4f32_v3f32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__1_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4242,11 +4315,12 @@ define void @v_shuffle_v4f32_v3f32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4254,11 +4328,12 @@ define void @v_shuffle_v4f32_v3f32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4266,11 +4341,12 @@ define void @v_shuffle_v4f32_v3f32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4395,36 +4471,39 @@ define void @v_shuffle_v4f32_v3f32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v3f32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
@@ -4432,7 +4511,7 @@ define void @v_shuffle_v4f32_v3f32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4726,43 +4805,47 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v3f32__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5375,29 +5458,32 @@ define void @v_shuffle_v4f32_v3f32__5_u_4_4(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5684,40 +5770,45 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6083,8 +6174,9 @@ define void @v_shuffle_v4f32_v3f32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
@@ -6101,6 +6193,7 @@ define void @v_shuffle_v4f32_v3f32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
@@ -6237,29 +6330,31 @@ define void @v_shuffle_v4f32_v3f32__4_5_5_5(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__4_5_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_5_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6605,13 +6700,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6619,13 +6715,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6721,9 +6818,11 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -6739,9 +6838,11 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -6877,6 +6978,7 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
@@ -6891,6 +6993,7 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
@@ -6966,6 +7069,7 @@ define void @s_shuffle_v4f32_v3f32__0_u_u_u() {
define void @s_shuffle_v4f32_v3f32__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6978,6 +7082,7 @@ define void @s_shuffle_v4f32_v3f32__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6990,6 +7095,7 @@ define void @s_shuffle_v4f32_v3f32__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7008,6 +7114,7 @@ define void @s_shuffle_v4f32_v3f32__1_u_u_u() {
define void @s_shuffle_v4f32_v3f32__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7020,6 +7127,7 @@ define void @s_shuffle_v4f32_v3f32__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7032,6 +7140,7 @@ define void @s_shuffle_v4f32_v3f32__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7064,6 +7173,7 @@ define void @s_shuffle_v4f32_v3f32__3_u_u_u() {
define void @s_shuffle_v4f32_v3f32__4_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__4_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7076,6 +7186,7 @@ define void @s_shuffle_v4f32_v3f32__4_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__4_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7088,6 +7199,7 @@ define void @s_shuffle_v4f32_v3f32__4_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7107,6 +7219,7 @@ define void @s_shuffle_v4f32_v3f32__4_u_u_u() {
define void @s_shuffle_v4f32_v3f32__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7119,6 +7232,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7131,6 +7245,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7150,14 +7265,15 @@ define void @s_shuffle_v4f32_v3f32__5_u_u_u() {
define void @s_shuffle_v4f32_v3f32__5_0_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7166,14 +7282,15 @@ define void @s_shuffle_v4f32_v3f32__5_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7182,6 +7299,7 @@ define void @s_shuffle_v4f32_v3f32__5_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7257,14 +7375,15 @@ define void @s_shuffle_v4f32_v3f32__5_1_u_u() {
define void @s_shuffle_v4f32_v3f32__5_2_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7273,14 +7392,15 @@ define void @s_shuffle_v4f32_v3f32__5_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7289,6 +7409,7 @@ define void @s_shuffle_v4f32_v3f32__5_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7312,6 +7433,7 @@ define void @s_shuffle_v4f32_v3f32__5_2_u_u() {
define void @s_shuffle_v4f32_v3f32__5_3_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7325,6 +7447,7 @@ define void @s_shuffle_v4f32_v3f32__5_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7338,6 +7461,7 @@ define void @s_shuffle_v4f32_v3f32__5_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7377,6 +7501,7 @@ define void @s_shuffle_v4f32_v3f32__5_4_u_u() {
define void @s_shuffle_v4f32_v3f32__5_5_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7390,6 +7515,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7403,6 +7529,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7423,15 +7550,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_u() {
define void @s_shuffle_v4f32_v3f32__5_5_0_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7440,15 +7568,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7457,6 +7586,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7481,15 +7611,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_u() {
define void @s_shuffle_v4f32_v3f32__5_5_1_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7498,15 +7629,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7515,6 +7647,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7594,6 +7727,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_u() {
define void @s_shuffle_v4f32_v3f32__5_5_3_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7608,6 +7742,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7622,6 +7757,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7643,6 +7779,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_u() {
define void @s_shuffle_v4f32_v3f32__5_5_4_u() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7657,6 +7794,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7671,6 +7809,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8011,6 +8150,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_5_5() {
define void @s_shuffle_v4f32_v3f32__u_0_0_0() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__u_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -8025,6 +8165,7 @@ define void @s_shuffle_v4f32_v3f32__u_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -8039,6 +8180,7 @@ define void @s_shuffle_v4f32_v3f32__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8181,6 +8323,7 @@ define void @s_shuffle_v4f32_v3f32__2_0_0_0() {
define void @s_shuffle_v4f32_v3f32__3_0_0_0() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__3_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -8195,6 +8338,7 @@ define void @s_shuffle_v4f32_v3f32__3_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__3_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -8209,6 +8353,7 @@ define void @s_shuffle_v4f32_v3f32__3_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8351,14 +8496,15 @@ define void @s_shuffle_v4f32_v3f32__5_0_0_0() {
define void @s_shuffle_v4f32_v3f32__5_u_0_0() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
@@ -8368,14 +8514,15 @@ define void @s_shuffle_v4f32_v3f32__5_u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
@@ -8385,6 +8532,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_0_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8711,15 +8859,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_0() {
define void @s_shuffle_v4f32_v3f32__5_5_u_0() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -8728,15 +8877,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -8745,6 +8895,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -9226,14 +9377,15 @@ define void @s_shuffle_v4f32_v3f32__5_1_1_1() {
define void @s_shuffle_v4f32_v3f32__5_u_1_1() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_u_1_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -9243,14 +9395,15 @@ define void @s_shuffle_v4f32_v3f32__5_u_1_1() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_u_1_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -9260,6 +9413,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_1_1() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_1_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -9586,15 +9740,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_1() {
define void @s_shuffle_v4f32_v3f32__5_5_u_1() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -9603,15 +9758,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_1() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -9620,6 +9776,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_1() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10446,15 +10603,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_2() {
define void @s_shuffle_v4f32_v3f32__5_5_u_2() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -10463,15 +10621,16 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -10480,6 +10639,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_2() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10802,6 +10962,7 @@ define void @s_shuffle_v4f32_v3f32__0_3_3_3() {
define void @s_shuffle_v4f32_v3f32__1_3_3_3() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__1_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -10814,6 +10975,7 @@ define void @s_shuffle_v4f32_v3f32__1_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__1_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -10826,6 +10988,7 @@ define void @s_shuffle_v4f32_v3f32__1_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10844,6 +11007,7 @@ define void @s_shuffle_v4f32_v3f32__1_3_3_3() {
define void @s_shuffle_v4f32_v3f32__2_3_3_3() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -10856,6 +11020,7 @@ define void @s_shuffle_v4f32_v3f32__2_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -10868,6 +11033,7 @@ define void @s_shuffle_v4f32_v3f32__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11004,6 +11170,7 @@ define void @s_shuffle_v4f32_v3f32__5_3_3_3() {
define void @s_shuffle_v4f32_v3f32__5_u_3_3() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11018,6 +11185,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11032,6 +11200,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_3_3() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11337,6 +11506,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_3() {
define void @s_shuffle_v4f32_v3f32__5_5_u_3() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11351,6 +11521,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11365,6 +11536,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_3() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11874,6 +12046,7 @@ define void @s_shuffle_v4f32_v3f32__5_4_4_4() {
define void @s_shuffle_v4f32_v3f32__5_u_4_4() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11888,6 +12061,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11902,6 +12076,7 @@ define void @s_shuffle_v4f32_v3f32__5_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -12207,6 +12382,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_4() {
define void @s_shuffle_v4f32_v3f32__5_5_u_4() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -12221,6 +12397,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -12235,6 +12412,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -12989,6 +13167,7 @@ define void @s_shuffle_v4f32_v3f32__5_4_5_5() {
define void @s_shuffle_v4f32_v3f32__5_5_u_5() {
; GFX900-LABEL: s_shuffle_v4f32_v3f32__5_5_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -13003,6 +13182,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v3f32__5_5_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -13017,6 +13197,7 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_5() {
;
; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
index edc540edb3ad1..8242805658876 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
@@ -61,9 +61,10 @@ define void @v_shuffle_v4f32_v4f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -73,9 +74,10 @@ define void @v_shuffle_v4f32_v4f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -85,9 +87,10 @@ define void @v_shuffle_v4f32_v4f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -103,33 +106,37 @@ define void @v_shuffle_v4f32_v4f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -141,37 +148,41 @@ define void @v_shuffle_v4f32_v4f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -198,9 +209,10 @@ define void @v_shuffle_v4f32_v4f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -210,9 +222,10 @@ define void @v_shuffle_v4f32_v4f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -222,9 +235,10 @@ define void @v_shuffle_v4f32_v4f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -241,33 +255,37 @@ define void @v_shuffle_v4f32_v4f32__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -280,37 +298,41 @@ define void @v_shuffle_v4f32_v4f32__6_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -327,13 +349,14 @@ define void @v_shuffle_v4f32_v4f32__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -434,13 +457,14 @@ define void @v_shuffle_v4f32_v4f32__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: ; implicit-def: $vgpr7_vgpr8_vgpr9_vgpr10
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx4 v11, v[7:10], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -484,49 +508,53 @@ define void @v_shuffle_v4f32_v4f32__7_2_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -539,14 +567,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,14 +655,15 @@ define void @v_shuffle_v4f32_v4f32__7_5_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -670,40 +700,44 @@ define void @v_shuffle_v4f32_v4f32__7_6_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -720,14 +754,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -737,14 +772,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,15 +790,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -779,14 +815,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -796,14 +833,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -813,15 +851,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -890,52 +928,56 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -948,41 +990,47 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -995,40 +1043,47 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1087,43 +1142,47 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1578,11 +1637,12 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1592,11 +1652,12 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1605,12 +1666,13 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1820,11 +1882,12 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,11 +1897,12 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1848,11 +1912,12 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2050,14 +2115,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2067,14 +2133,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2084,15 +2151,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v9, v0
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2529,14 +2596,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2546,14 +2614,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v9, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2563,15 +2632,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3400,14 +3469,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3417,14 +3487,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3434,15 +3505,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3877,14 +3948,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3894,14 +3966,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3911,15 +3984,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5196,48 +5269,52 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr7_vgpr8_vgpr9_vgpr10
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: global_store_dwordx4 v11, v[7:10], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v11, v2
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v11, v2
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6974,9 +7051,10 @@ define void @v_shuffle_v4f32_v4f32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6986,9 +7064,10 @@ define void @v_shuffle_v4f32_v4f32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6998,9 +7077,10 @@ define void @v_shuffle_v4f32_v4f32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7016,33 +7096,37 @@ define void @v_shuffle_v4f32_v4f32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7054,37 +7138,41 @@ define void @v_shuffle_v4f32_v4f32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__3_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7256,43 +7344,47 @@ define void @v_shuffle_v4f32_v4f32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7694,41 +7786,47 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8548,43 +8646,47 @@ define void @v_shuffle_v4f32_v4f32__7_5_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8980,40 +9082,47 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10248,43 +10357,47 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_6(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4f32_v4f32__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11967,6 +12080,7 @@ define void @s_shuffle_v4f32_v4f32__0_u_u_u() {
define void @s_shuffle_v4f32_v4f32__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11979,6 +12093,7 @@ define void @s_shuffle_v4f32_v4f32__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11991,6 +12106,7 @@ define void @s_shuffle_v4f32_v4f32__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12009,6 +12125,7 @@ define void @s_shuffle_v4f32_v4f32__1_u_u_u() {
define void @s_shuffle_v4f32_v4f32__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12021,6 +12138,7 @@ define void @s_shuffle_v4f32_v4f32__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12033,6 +12151,7 @@ define void @s_shuffle_v4f32_v4f32__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12051,6 +12170,7 @@ define void @s_shuffle_v4f32_v4f32__2_u_u_u() {
define void @s_shuffle_v4f32_v4f32__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12063,6 +12183,7 @@ define void @s_shuffle_v4f32_v4f32__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12075,6 +12196,7 @@ define void @s_shuffle_v4f32_v4f32__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12107,6 +12229,7 @@ define void @s_shuffle_v4f32_v4f32__4_u_u_u() {
define void @s_shuffle_v4f32_v4f32__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12119,6 +12242,7 @@ define void @s_shuffle_v4f32_v4f32__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12131,6 +12255,7 @@ define void @s_shuffle_v4f32_v4f32__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12150,6 +12275,7 @@ define void @s_shuffle_v4f32_v4f32__5_u_u_u() {
define void @s_shuffle_v4f32_v4f32__6_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12162,6 +12288,7 @@ define void @s_shuffle_v4f32_v4f32__6_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12174,6 +12301,7 @@ define void @s_shuffle_v4f32_v4f32__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12193,6 +12321,7 @@ define void @s_shuffle_v4f32_v4f32__6_u_u_u() {
define void @s_shuffle_v4f32_v4f32__7_u_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12205,6 +12334,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12217,6 +12347,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12236,14 +12367,15 @@ define void @s_shuffle_v4f32_v4f32__7_u_u_u() {
define void @s_shuffle_v4f32_v4f32__7_0_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12252,14 +12384,15 @@ define void @s_shuffle_v4f32_v4f32__7_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12268,6 +12401,7 @@ define void @s_shuffle_v4f32_v4f32__7_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12343,14 +12477,15 @@ define void @s_shuffle_v4f32_v4f32__7_1_u_u() {
define void @s_shuffle_v4f32_v4f32__7_2_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12359,14 +12494,15 @@ define void @s_shuffle_v4f32_v4f32__7_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12375,6 +12511,7 @@ define void @s_shuffle_v4f32_v4f32__7_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12398,14 +12535,15 @@ define void @s_shuffle_v4f32_v4f32__7_2_u_u() {
define void @s_shuffle_v4f32_v4f32__7_3_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12414,14 +12552,15 @@ define void @s_shuffle_v4f32_v4f32__7_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12430,6 +12569,7 @@ define void @s_shuffle_v4f32_v4f32__7_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12453,6 +12593,7 @@ define void @s_shuffle_v4f32_v4f32__7_3_u_u() {
define void @s_shuffle_v4f32_v4f32__7_4_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12466,6 +12607,7 @@ define void @s_shuffle_v4f32_v4f32__7_4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12479,6 +12621,7 @@ define void @s_shuffle_v4f32_v4f32__7_4_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12518,6 +12661,7 @@ define void @s_shuffle_v4f32_v4f32__7_5_u_u() {
define void @s_shuffle_v4f32_v4f32__7_6_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12531,6 +12675,7 @@ define void @s_shuffle_v4f32_v4f32__7_6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12544,6 +12689,7 @@ define void @s_shuffle_v4f32_v4f32__7_6_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12564,6 +12710,7 @@ define void @s_shuffle_v4f32_v4f32__7_6_u_u() {
define void @s_shuffle_v4f32_v4f32__7_7_u_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12577,6 +12724,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12590,6 +12738,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12610,15 +12759,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_u() {
define void @s_shuffle_v4f32_v4f32__7_7_0_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12627,15 +12777,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12644,6 +12795,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12668,15 +12820,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_u() {
define void @s_shuffle_v4f32_v4f32__7_7_1_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12685,15 +12838,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12702,6 +12856,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12781,15 +12936,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_u() {
define void @s_shuffle_v4f32_v4f32__7_7_3_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12798,15 +12954,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12815,6 +12972,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12839,6 +12997,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_u() {
define void @s_shuffle_v4f32_v4f32__7_7_4_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12853,6 +13012,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12867,6 +13027,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12888,6 +13049,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_u() {
define void @s_shuffle_v4f32_v4f32__7_7_5_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12902,6 +13064,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12916,6 +13079,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12957,6 +13121,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_u() {
define void @s_shuffle_v4f32_v4f32__7_7_7_u() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12971,6 +13136,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_u() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12985,6 +13151,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_u() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13424,6 +13591,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_7() {
define void @s_shuffle_v4f32_v4f32__u_0_0_0() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__u_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13438,6 +13606,7 @@ define void @s_shuffle_v4f32_v4f32__u_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13452,6 +13621,7 @@ define void @s_shuffle_v4f32_v4f32__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13645,6 +13815,7 @@ define void @s_shuffle_v4f32_v4f32__3_0_0_0() {
define void @s_shuffle_v4f32_v4f32__4_0_0_0() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__4_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13659,6 +13830,7 @@ define void @s_shuffle_v4f32_v4f32__4_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__4_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13673,6 +13845,7 @@ define void @s_shuffle_v4f32_v4f32__4_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__4_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13876,14 +14049,15 @@ define void @s_shuffle_v4f32_v4f32__7_0_0_0() {
define void @s_shuffle_v4f32_v4f32__7_u_0_0() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
@@ -13893,14 +14067,15 @@ define void @s_shuffle_v4f32_v4f32__7_u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
@@ -13910,6 +14085,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_0_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -14358,15 +14534,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_0() {
define void @s_shuffle_v4f32_v4f32__7_7_u_0() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -14375,15 +14552,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_0() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -14392,6 +14570,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_0() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -15070,14 +15249,15 @@ define void @s_shuffle_v4f32_v4f32__7_1_1_1() {
define void @s_shuffle_v4f32_v4f32__7_u_1_1() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_u_1_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -15087,14 +15267,15 @@ define void @s_shuffle_v4f32_v4f32__7_u_1_1() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_u_1_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -15104,6 +15285,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_1_1() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_1_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -15552,15 +15734,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_1() {
define void @s_shuffle_v4f32_v4f32__7_7_u_1() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -15569,15 +15752,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_1() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -15586,6 +15770,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_1() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -16725,15 +16910,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_2() {
define void @s_shuffle_v4f32_v4f32__7_7_u_2() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -16742,15 +16928,16 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -16759,6 +16946,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_2() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18358,6 +18546,7 @@ define void @s_shuffle_v4f32_v4f32__0_4_4_4() {
define void @s_shuffle_v4f32_v4f32__1_4_4_4() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__1_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18370,6 +18559,7 @@ define void @s_shuffle_v4f32_v4f32__1_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__1_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18382,6 +18572,7 @@ define void @s_shuffle_v4f32_v4f32__1_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18400,6 +18591,7 @@ define void @s_shuffle_v4f32_v4f32__1_4_4_4() {
define void @s_shuffle_v4f32_v4f32__2_4_4_4() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18412,6 +18604,7 @@ define void @s_shuffle_v4f32_v4f32__2_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18424,6 +18617,7 @@ define void @s_shuffle_v4f32_v4f32__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18442,6 +18636,7 @@ define void @s_shuffle_v4f32_v4f32__2_4_4_4() {
define void @s_shuffle_v4f32_v4f32__3_4_4_4() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__3_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18454,6 +18649,7 @@ define void @s_shuffle_v4f32_v4f32__3_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__3_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18466,6 +18662,7 @@ define void @s_shuffle_v4f32_v4f32__3_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18654,6 +18851,7 @@ define void @s_shuffle_v4f32_v4f32__7_4_4_4() {
define void @s_shuffle_v4f32_v4f32__7_u_4_4() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18668,6 +18866,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18682,6 +18881,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -19100,6 +19300,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_4() {
define void @s_shuffle_v4f32_v4f32__7_7_u_4() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -19114,6 +19315,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -19128,6 +19330,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_4() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -19829,6 +20032,7 @@ define void @s_shuffle_v4f32_v4f32__7_5_5_5() {
define void @s_shuffle_v4f32_v4f32__7_u_5_5() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -19843,6 +20047,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_5_5() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -19857,6 +20062,7 @@ define void @s_shuffle_v4f32_v4f32__7_u_5_5() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -20275,6 +20481,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_5() {
define void @s_shuffle_v4f32_v4f32__7_7_u_5() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -20289,6 +20496,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -20303,6 +20511,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_5() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -21349,6 +21558,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_6() {
define void @s_shuffle_v4f32_v4f32__7_7_u_6() {
; GFX900-LABEL: s_shuffle_v4f32_v4f32__7_7_u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -21363,6 +21573,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_6() {
;
; GFX90A-LABEL: s_shuffle_v4f32_v4f32__7_7_u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -21377,6 +21588,7 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_6() {
;
; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll
index 7b3a5a879f44f..8336b63c1088b 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll
@@ -102,37 +102,41 @@ define void @v_shuffle_v4i16_v3i16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i16_v3i16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i16> asm "; def $0", "=v"()
@@ -202,37 +206,41 @@ define void @v_shuffle_v4i16_v3i16__4_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i16_v3i16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i16> asm "; def $0", "=v"()
@@ -4458,37 +4466,41 @@ define void @v_shuffle_v4i16_v3i16__1_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i16_v3i16__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i16> asm "; def $0", "=v"()
@@ -7390,6 +7402,7 @@ define void @s_shuffle_v4i16_v3i16__1_u_u_u() {
define void @s_shuffle_v4i16_v3i16__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7402,6 +7415,7 @@ define void @s_shuffle_v4i16_v3i16__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7414,6 +7428,7 @@ define void @s_shuffle_v4i16_v3i16__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -7493,6 +7508,7 @@ define void @s_shuffle_v4i16_v3i16__4_u_u_u() {
define void @s_shuffle_v4i16_v3i16__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -7505,6 +7521,7 @@ define void @s_shuffle_v4i16_v3i16__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -7517,6 +7534,7 @@ define void @s_shuffle_v4i16_v3i16__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -11390,6 +11408,7 @@ define void @s_shuffle_v4i16_v3i16__1_3_3_3() {
define void @s_shuffle_v4i16_v3i16__2_3_3_3() {
; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -11402,6 +11421,7 @@ define void @s_shuffle_v4i16_v3i16__2_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -11414,6 +11434,7 @@ define void @s_shuffle_v4i16_v3i16__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll
index 2a371b7c7d2d3..953603b990d4b 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll
@@ -99,37 +99,41 @@ define void @v_shuffle_v4i16_v4i16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i16_v4i16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i16> asm "; def $0", "=v"()
@@ -237,37 +241,41 @@ define void @v_shuffle_v4i16_v4i16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i16_v4i16__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i16> asm "; def $0", "=v"()
@@ -7087,37 +7095,41 @@ define void @v_shuffle_v4i16_v4i16__1_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i16_v4i16__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:1]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i16> asm "; def $0", "=v"()
@@ -12198,6 +12210,7 @@ define void @s_shuffle_v4i16_v4i16__1_u_u_u() {
define void @s_shuffle_v4i16_v4i16__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12210,6 +12223,7 @@ define void @s_shuffle_v4i16_v4i16__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12222,6 +12236,7 @@ define void @s_shuffle_v4i16_v4i16__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -12339,6 +12354,7 @@ define void @s_shuffle_v4i16_v4i16__5_u_u_u() {
define void @s_shuffle_v4i16_v4i16__6_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -12351,6 +12367,7 @@ define void @s_shuffle_v4i16_v4i16__6_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -12363,6 +12380,7 @@ define void @s_shuffle_v4i16_v4i16__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -18740,6 +18758,7 @@ define void @s_shuffle_v4i16_v4i16__1_4_4_4() {
define void @s_shuffle_v4i16_v4i16__2_4_4_4() {
; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -18752,6 +18771,7 @@ define void @s_shuffle_v4i16_v4i16__2_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -18764,6 +18784,7 @@ define void @s_shuffle_v4i16_v4i16__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
index 9d3affa6da266..a297452d7ca47 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
@@ -59,35 +59,39 @@ define void @v_shuffle_v4i32_v2i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v2i32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -112,35 +116,39 @@ define void @v_shuffle_v4i32_v2i32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v2i32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -271,28 +279,30 @@ define void @v_shuffle_v4i32_v2i32__3_2_u_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v2i32__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -2068,35 +2078,39 @@ define void @v_shuffle_v4i32_v2i32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v2i32__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -2371,43 +2385,47 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v2i32__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v2i32__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i32> asm "; def $0", "=v"()
@@ -3201,6 +3219,7 @@ define void @s_shuffle_v4i32_v2i32__0_u_u_u() {
define void @s_shuffle_v4i32_v2i32__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v2i32__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3213,6 +3232,7 @@ define void @s_shuffle_v4i32_v2i32__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v2i32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3225,6 +3245,7 @@ define void @s_shuffle_v4i32_v2i32__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v2i32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3257,6 +3278,7 @@ define void @s_shuffle_v4i32_v2i32__2_u_u_u() {
define void @s_shuffle_v4i32_v2i32__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v2i32__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3269,6 +3291,7 @@ define void @s_shuffle_v4i32_v2i32__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v2i32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3281,6 +3304,7 @@ define void @s_shuffle_v4i32_v2i32__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3300,6 +3324,7 @@ define void @s_shuffle_v4i32_v2i32__3_u_u_u() {
define void @s_shuffle_v4i32_v2i32__3_0_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v2i32__3_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3316,6 +3341,7 @@ define void @s_shuffle_v4i32_v2i32__3_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v2i32__3_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3332,6 +3358,7 @@ define void @s_shuffle_v4i32_v2i32__3_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3407,6 +3434,7 @@ define void @s_shuffle_v4i32_v2i32__3_1_u_u() {
define void @s_shuffle_v4i32_v2i32__3_2_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v2i32__3_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3420,6 +3448,7 @@ define void @s_shuffle_v4i32_v2i32__3_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v2i32__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3433,6 +3462,7 @@ define void @s_shuffle_v4i32_v2i32__3_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4731,6 +4761,7 @@ define void @s_shuffle_v4i32_v2i32__0_2_2_2() {
define void @s_shuffle_v4i32_v2i32__1_2_2_2() {
; GFX900-LABEL: s_shuffle_v4i32_v2i32__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4743,6 +4774,7 @@ define void @s_shuffle_v4i32_v2i32__1_2_2_2() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v2i32__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4755,6 +4787,7 @@ define void @s_shuffle_v4i32_v2i32__1_2_2_2() {
;
; GFX942-LABEL: s_shuffle_v4i32_v2i32__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4930,6 +4963,7 @@ define void @s_shuffle_v4i32_v2i32__3_3_2_2() {
define void @s_shuffle_v4i32_v2i32__3_3_u_2() {
; GFX900-LABEL: s_shuffle_v4i32_v2i32__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4944,6 +4978,7 @@ define void @s_shuffle_v4i32_v2i32__3_3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v2i32__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4958,6 +4993,7 @@ define void @s_shuffle_v4i32_v2i32__3_3_u_2() {
;
; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
index 1a669adf2b635..8ee15c1a9c3f0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
@@ -59,35 +59,39 @@ define void @v_shuffle_v4i32_v3i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -101,11 +105,12 @@ define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -113,11 +118,12 @@ define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -125,11 +131,12 @@ define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -154,35 +161,39 @@ define void @v_shuffle_v4i32_v3i32__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -197,11 +208,12 @@ define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -209,11 +221,12 @@ define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -221,11 +234,12 @@ define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -255,15 +269,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -271,15 +286,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -347,15 +363,16 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -363,15 +380,16 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -379,15 +397,16 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -412,28 +431,30 @@ define void @v_shuffle_v4i32_v3i32__5_3_u_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -491,12 +512,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -504,12 +526,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -517,12 +540,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -537,15 +561,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -553,15 +579,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -569,16 +597,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -609,16 +638,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,17 +656,18 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -705,41 +736,47 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v3i32__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v6
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -765,29 +802,32 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1179,29 +1219,32 @@ define void @v_shuffle_v4i32_v3i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1374,29 +1417,32 @@ define void @v_shuffle_v4i32_v3i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1529,16 +1575,17 @@ define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1546,16 +1593,17 @@ define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,16 +1611,17 @@ define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1884,16 +1933,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1901,16 +1951,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1918,16 +1969,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2545,16 +2597,17 @@ define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2562,17 +2615,18 @@ define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2876,16 +2930,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2893,15 +2948,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2909,16 +2966,18 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3268,29 +3327,31 @@ define void @v_shuffle_v4i32_v3i32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3416,8 +3477,9 @@ define void @v_shuffle_v4i32_v3i32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
@@ -3433,6 +3495,7 @@ define void @v_shuffle_v4i32_v3i32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
@@ -3871,16 +3934,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3888,16 +3952,17 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3986,14 +4051,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
@@ -4004,14 +4070,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
@@ -4108,8 +4175,9 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
@@ -4126,6 +4194,7 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
@@ -4200,35 +4269,39 @@ define void @v_shuffle_v4i32_v3i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4242,11 +4315,12 @@ define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4254,11 +4328,12 @@ define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4266,11 +4341,12 @@ define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4395,36 +4471,39 @@ define void @v_shuffle_v4i32_v3i32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v3i32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
@@ -4432,7 +4511,7 @@ define void @v_shuffle_v4i32_v3i32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4726,43 +4805,47 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v3i32__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5375,29 +5458,32 @@ define void @v_shuffle_v4i32_v3i32__5_u_4_4(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5684,40 +5770,45 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6083,8 +6174,9 @@ define void @v_shuffle_v4i32_v3i32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
@@ -6101,6 +6193,7 @@ define void @v_shuffle_v4i32_v3i32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
@@ -6237,29 +6330,31 @@ define void @v_shuffle_v4i32_v3i32__4_5_5_5(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6605,13 +6700,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6619,13 +6715,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6721,9 +6818,11 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -6739,9 +6838,11 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -6877,6 +6978,7 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
@@ -6891,6 +6993,7 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
@@ -6966,6 +7069,7 @@ define void @s_shuffle_v4i32_v3i32__0_u_u_u() {
define void @s_shuffle_v4i32_v3i32__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6978,6 +7082,7 @@ define void @s_shuffle_v4i32_v3i32__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6990,6 +7095,7 @@ define void @s_shuffle_v4i32_v3i32__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7008,6 +7114,7 @@ define void @s_shuffle_v4i32_v3i32__1_u_u_u() {
define void @s_shuffle_v4i32_v3i32__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7020,6 +7127,7 @@ define void @s_shuffle_v4i32_v3i32__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7032,6 +7140,7 @@ define void @s_shuffle_v4i32_v3i32__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7064,6 +7173,7 @@ define void @s_shuffle_v4i32_v3i32__3_u_u_u() {
define void @s_shuffle_v4i32_v3i32__4_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7076,6 +7186,7 @@ define void @s_shuffle_v4i32_v3i32__4_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7088,6 +7199,7 @@ define void @s_shuffle_v4i32_v3i32__4_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7107,6 +7219,7 @@ define void @s_shuffle_v4i32_v3i32__4_u_u_u() {
define void @s_shuffle_v4i32_v3i32__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7119,6 +7232,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7131,6 +7245,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7150,14 +7265,15 @@ define void @s_shuffle_v4i32_v3i32__5_u_u_u() {
define void @s_shuffle_v4i32_v3i32__5_0_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7166,14 +7282,15 @@ define void @s_shuffle_v4i32_v3i32__5_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7182,6 +7299,7 @@ define void @s_shuffle_v4i32_v3i32__5_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7257,14 +7375,15 @@ define void @s_shuffle_v4i32_v3i32__5_1_u_u() {
define void @s_shuffle_v4i32_v3i32__5_2_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7273,14 +7392,15 @@ define void @s_shuffle_v4i32_v3i32__5_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7289,6 +7409,7 @@ define void @s_shuffle_v4i32_v3i32__5_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7312,6 +7433,7 @@ define void @s_shuffle_v4i32_v3i32__5_2_u_u() {
define void @s_shuffle_v4i32_v3i32__5_3_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7325,6 +7447,7 @@ define void @s_shuffle_v4i32_v3i32__5_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7338,6 +7461,7 @@ define void @s_shuffle_v4i32_v3i32__5_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7377,6 +7501,7 @@ define void @s_shuffle_v4i32_v3i32__5_4_u_u() {
define void @s_shuffle_v4i32_v3i32__5_5_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7390,6 +7515,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7403,6 +7529,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7423,15 +7550,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_u() {
define void @s_shuffle_v4i32_v3i32__5_5_0_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7440,15 +7568,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7457,6 +7586,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7481,15 +7611,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_u() {
define void @s_shuffle_v4i32_v3i32__5_5_1_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7498,15 +7629,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7515,6 +7647,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7594,6 +7727,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_u() {
define void @s_shuffle_v4i32_v3i32__5_5_3_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7608,6 +7742,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7622,6 +7757,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7643,6 +7779,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_u() {
define void @s_shuffle_v4i32_v3i32__5_5_4_u() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7657,6 +7794,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7671,6 +7809,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8011,6 +8150,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_5_5() {
define void @s_shuffle_v4i32_v3i32__u_0_0_0() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -8025,6 +8165,7 @@ define void @s_shuffle_v4i32_v3i32__u_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -8039,6 +8180,7 @@ define void @s_shuffle_v4i32_v3i32__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8181,6 +8323,7 @@ define void @s_shuffle_v4i32_v3i32__2_0_0_0() {
define void @s_shuffle_v4i32_v3i32__3_0_0_0() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -8195,6 +8338,7 @@ define void @s_shuffle_v4i32_v3i32__3_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -8209,6 +8353,7 @@ define void @s_shuffle_v4i32_v3i32__3_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8351,14 +8496,15 @@ define void @s_shuffle_v4i32_v3i32__5_0_0_0() {
define void @s_shuffle_v4i32_v3i32__5_u_0_0() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
@@ -8368,14 +8514,15 @@ define void @s_shuffle_v4i32_v3i32__5_u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
@@ -8385,6 +8532,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8711,15 +8859,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_0() {
define void @s_shuffle_v4i32_v3i32__5_5_u_0() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -8728,15 +8877,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -8745,6 +8895,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -9226,14 +9377,15 @@ define void @s_shuffle_v4i32_v3i32__5_1_1_1() {
define void @s_shuffle_v4i32_v3i32__5_u_1_1() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -9243,14 +9395,15 @@ define void @s_shuffle_v4i32_v3i32__5_u_1_1() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -9260,6 +9413,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_1_1() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -9586,15 +9740,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_1() {
define void @s_shuffle_v4i32_v3i32__5_5_u_1() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -9603,15 +9758,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_1() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -9620,6 +9776,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_1() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10446,15 +10603,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_2() {
define void @s_shuffle_v4i32_v3i32__5_5_u_2() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -10463,15 +10621,16 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -10480,6 +10639,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_2() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10802,6 +10962,7 @@ define void @s_shuffle_v4i32_v3i32__0_3_3_3() {
define void @s_shuffle_v4i32_v3i32__1_3_3_3() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -10814,6 +10975,7 @@ define void @s_shuffle_v4i32_v3i32__1_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -10826,6 +10988,7 @@ define void @s_shuffle_v4i32_v3i32__1_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10844,6 +11007,7 @@ define void @s_shuffle_v4i32_v3i32__1_3_3_3() {
define void @s_shuffle_v4i32_v3i32__2_3_3_3() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -10856,6 +11020,7 @@ define void @s_shuffle_v4i32_v3i32__2_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -10868,6 +11033,7 @@ define void @s_shuffle_v4i32_v3i32__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11004,6 +11170,7 @@ define void @s_shuffle_v4i32_v3i32__5_3_3_3() {
define void @s_shuffle_v4i32_v3i32__5_u_3_3() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11018,6 +11185,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11032,6 +11200,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11337,6 +11506,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_3() {
define void @s_shuffle_v4i32_v3i32__5_5_u_3() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11351,6 +11521,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11365,6 +11536,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_3() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11874,6 +12046,7 @@ define void @s_shuffle_v4i32_v3i32__5_4_4_4() {
define void @s_shuffle_v4i32_v3i32__5_u_4_4() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11888,6 +12061,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11902,6 +12076,7 @@ define void @s_shuffle_v4i32_v3i32__5_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -12207,6 +12382,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_4() {
define void @s_shuffle_v4i32_v3i32__5_5_u_4() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -12221,6 +12397,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -12235,6 +12412,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -12989,6 +13167,7 @@ define void @s_shuffle_v4i32_v3i32__5_4_5_5() {
define void @s_shuffle_v4i32_v3i32__5_5_u_5() {
; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -13003,6 +13182,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -13017,6 +13197,7 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_5() {
;
; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
index 983afa566e2c1..3010c1c411a32 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
@@ -61,9 +61,10 @@ define void @v_shuffle_v4i32_v4i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -73,9 +74,10 @@ define void @v_shuffle_v4i32_v4i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -85,9 +87,10 @@ define void @v_shuffle_v4i32_v4i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -103,33 +106,37 @@ define void @v_shuffle_v4i32_v4i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -141,37 +148,41 @@ define void @v_shuffle_v4i32_v4i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -198,9 +209,10 @@ define void @v_shuffle_v4i32_v4i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -210,9 +222,10 @@ define void @v_shuffle_v4i32_v4i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -222,9 +235,10 @@ define void @v_shuffle_v4i32_v4i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -241,33 +255,37 @@ define void @v_shuffle_v4i32_v4i32__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -280,37 +298,41 @@ define void @v_shuffle_v4i32_v4i32__6_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -327,13 +349,14 @@ define void @v_shuffle_v4i32_v4i32__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -434,13 +457,14 @@ define void @v_shuffle_v4i32_v4i32__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: ; implicit-def: $vgpr7_vgpr8_vgpr9_vgpr10
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx4 v11, v[7:10], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -484,49 +508,53 @@ define void @v_shuffle_v4i32_v4i32__7_2_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -539,14 +567,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,14 +655,15 @@ define void @v_shuffle_v4i32_v4i32__7_5_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -670,40 +700,44 @@ define void @v_shuffle_v4i32_v4i32__7_6_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -720,14 +754,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -737,14 +772,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,15 +790,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -779,14 +815,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -796,14 +833,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -813,15 +851,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -890,52 +928,56 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -948,41 +990,47 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -995,40 +1043,47 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1087,43 +1142,47 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1578,11 +1637,12 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1592,11 +1652,12 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1605,12 +1666,13 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1820,11 +1882,12 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,11 +1897,12 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1848,11 +1912,12 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2050,14 +2115,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2067,14 +2133,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2084,15 +2151,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v9, v0
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2529,14 +2596,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2546,14 +2614,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v9, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2563,15 +2632,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3400,14 +3469,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3417,14 +3487,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3434,15 +3505,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3877,14 +3948,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3894,14 +3966,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3911,15 +3984,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5196,48 +5269,52 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr7_vgpr8_vgpr9_vgpr10
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: global_store_dwordx4 v11, v[7:10], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v11, v2
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v11, v2
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6974,9 +7051,10 @@ define void @v_shuffle_v4i32_v4i32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6986,9 +7064,10 @@ define void @v_shuffle_v4i32_v4i32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6998,9 +7077,10 @@ define void @v_shuffle_v4i32_v4i32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7016,33 +7096,37 @@ define void @v_shuffle_v4i32_v4i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7054,37 +7138,41 @@ define void @v_shuffle_v4i32_v4i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__3_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7256,43 +7344,47 @@ define void @v_shuffle_v4i32_v4i32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7694,41 +7786,47 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8548,43 +8646,47 @@ define void @v_shuffle_v4i32_v4i32__7_5_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8980,40 +9082,47 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10248,43 +10357,47 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_6(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i32_v4i32__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11967,6 +12080,7 @@ define void @s_shuffle_v4i32_v4i32__0_u_u_u() {
define void @s_shuffle_v4i32_v4i32__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11979,6 +12093,7 @@ define void @s_shuffle_v4i32_v4i32__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11991,6 +12106,7 @@ define void @s_shuffle_v4i32_v4i32__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12009,6 +12125,7 @@ define void @s_shuffle_v4i32_v4i32__1_u_u_u() {
define void @s_shuffle_v4i32_v4i32__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12021,6 +12138,7 @@ define void @s_shuffle_v4i32_v4i32__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12033,6 +12151,7 @@ define void @s_shuffle_v4i32_v4i32__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12051,6 +12170,7 @@ define void @s_shuffle_v4i32_v4i32__2_u_u_u() {
define void @s_shuffle_v4i32_v4i32__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12063,6 +12183,7 @@ define void @s_shuffle_v4i32_v4i32__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12075,6 +12196,7 @@ define void @s_shuffle_v4i32_v4i32__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12107,6 +12229,7 @@ define void @s_shuffle_v4i32_v4i32__4_u_u_u() {
define void @s_shuffle_v4i32_v4i32__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12119,6 +12242,7 @@ define void @s_shuffle_v4i32_v4i32__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12131,6 +12255,7 @@ define void @s_shuffle_v4i32_v4i32__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12150,6 +12275,7 @@ define void @s_shuffle_v4i32_v4i32__5_u_u_u() {
define void @s_shuffle_v4i32_v4i32__6_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12162,6 +12288,7 @@ define void @s_shuffle_v4i32_v4i32__6_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12174,6 +12301,7 @@ define void @s_shuffle_v4i32_v4i32__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12193,6 +12321,7 @@ define void @s_shuffle_v4i32_v4i32__6_u_u_u() {
define void @s_shuffle_v4i32_v4i32__7_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12205,6 +12334,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12217,6 +12347,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12236,14 +12367,15 @@ define void @s_shuffle_v4i32_v4i32__7_u_u_u() {
define void @s_shuffle_v4i32_v4i32__7_0_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12252,14 +12384,15 @@ define void @s_shuffle_v4i32_v4i32__7_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12268,6 +12401,7 @@ define void @s_shuffle_v4i32_v4i32__7_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12343,14 +12477,15 @@ define void @s_shuffle_v4i32_v4i32__7_1_u_u() {
define void @s_shuffle_v4i32_v4i32__7_2_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12359,14 +12494,15 @@ define void @s_shuffle_v4i32_v4i32__7_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12375,6 +12511,7 @@ define void @s_shuffle_v4i32_v4i32__7_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12398,14 +12535,15 @@ define void @s_shuffle_v4i32_v4i32__7_2_u_u() {
define void @s_shuffle_v4i32_v4i32__7_3_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12414,14 +12552,15 @@ define void @s_shuffle_v4i32_v4i32__7_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12430,6 +12569,7 @@ define void @s_shuffle_v4i32_v4i32__7_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12453,6 +12593,7 @@ define void @s_shuffle_v4i32_v4i32__7_3_u_u() {
define void @s_shuffle_v4i32_v4i32__7_4_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12466,6 +12607,7 @@ define void @s_shuffle_v4i32_v4i32__7_4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12479,6 +12621,7 @@ define void @s_shuffle_v4i32_v4i32__7_4_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12518,6 +12661,7 @@ define void @s_shuffle_v4i32_v4i32__7_5_u_u() {
define void @s_shuffle_v4i32_v4i32__7_6_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12531,6 +12675,7 @@ define void @s_shuffle_v4i32_v4i32__7_6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12544,6 +12689,7 @@ define void @s_shuffle_v4i32_v4i32__7_6_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12564,6 +12710,7 @@ define void @s_shuffle_v4i32_v4i32__7_6_u_u() {
define void @s_shuffle_v4i32_v4i32__7_7_u_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12577,6 +12724,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12590,6 +12738,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12610,15 +12759,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_u() {
define void @s_shuffle_v4i32_v4i32__7_7_0_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12627,15 +12777,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12644,6 +12795,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12668,15 +12820,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_u() {
define void @s_shuffle_v4i32_v4i32__7_7_1_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12685,15 +12838,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12702,6 +12856,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12781,15 +12936,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_u() {
define void @s_shuffle_v4i32_v4i32__7_7_3_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12798,15 +12954,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12815,6 +12972,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12839,6 +12997,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_u() {
define void @s_shuffle_v4i32_v4i32__7_7_4_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12853,6 +13012,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12867,6 +13027,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12888,6 +13049,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_u() {
define void @s_shuffle_v4i32_v4i32__7_7_5_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12902,6 +13064,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12916,6 +13079,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12957,6 +13121,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_u() {
define void @s_shuffle_v4i32_v4i32__7_7_7_u() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12971,6 +13136,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_u() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12985,6 +13151,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_u() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13424,6 +13591,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_7() {
define void @s_shuffle_v4i32_v4i32__u_0_0_0() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__u_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13438,6 +13606,7 @@ define void @s_shuffle_v4i32_v4i32__u_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13452,6 +13621,7 @@ define void @s_shuffle_v4i32_v4i32__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13645,6 +13815,7 @@ define void @s_shuffle_v4i32_v4i32__3_0_0_0() {
define void @s_shuffle_v4i32_v4i32__4_0_0_0() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__4_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13659,6 +13830,7 @@ define void @s_shuffle_v4i32_v4i32__4_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__4_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13673,6 +13845,7 @@ define void @s_shuffle_v4i32_v4i32__4_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__4_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13876,14 +14049,15 @@ define void @s_shuffle_v4i32_v4i32__7_0_0_0() {
define void @s_shuffle_v4i32_v4i32__7_u_0_0() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
@@ -13893,14 +14067,15 @@ define void @s_shuffle_v4i32_v4i32__7_u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
@@ -13910,6 +14085,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -14358,15 +14534,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_0() {
define void @s_shuffle_v4i32_v4i32__7_7_u_0() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -14375,15 +14552,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_0() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -14392,6 +14570,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_0() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -15070,14 +15249,15 @@ define void @s_shuffle_v4i32_v4i32__7_1_1_1() {
define void @s_shuffle_v4i32_v4i32__7_u_1_1() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_u_1_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -15087,14 +15267,15 @@ define void @s_shuffle_v4i32_v4i32__7_u_1_1() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_u_1_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -15104,6 +15285,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_1_1() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_1_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -15552,15 +15734,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_1() {
define void @s_shuffle_v4i32_v4i32__7_7_u_1() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -15569,15 +15752,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_1() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -15586,6 +15770,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_1() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -16725,15 +16910,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_2() {
define void @s_shuffle_v4i32_v4i32__7_7_u_2() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -16742,15 +16928,16 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -16759,6 +16946,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_2() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18358,6 +18546,7 @@ define void @s_shuffle_v4i32_v4i32__0_4_4_4() {
define void @s_shuffle_v4i32_v4i32__1_4_4_4() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__1_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18370,6 +18559,7 @@ define void @s_shuffle_v4i32_v4i32__1_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__1_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18382,6 +18572,7 @@ define void @s_shuffle_v4i32_v4i32__1_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18400,6 +18591,7 @@ define void @s_shuffle_v4i32_v4i32__1_4_4_4() {
define void @s_shuffle_v4i32_v4i32__2_4_4_4() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18412,6 +18604,7 @@ define void @s_shuffle_v4i32_v4i32__2_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18424,6 +18617,7 @@ define void @s_shuffle_v4i32_v4i32__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18442,6 +18636,7 @@ define void @s_shuffle_v4i32_v4i32__2_4_4_4() {
define void @s_shuffle_v4i32_v4i32__3_4_4_4() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__3_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18454,6 +18649,7 @@ define void @s_shuffle_v4i32_v4i32__3_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__3_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18466,6 +18662,7 @@ define void @s_shuffle_v4i32_v4i32__3_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18654,6 +18851,7 @@ define void @s_shuffle_v4i32_v4i32__7_4_4_4() {
define void @s_shuffle_v4i32_v4i32__7_u_4_4() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18668,6 +18866,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18682,6 +18881,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -19100,6 +19300,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_4() {
define void @s_shuffle_v4i32_v4i32__7_7_u_4() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -19114,6 +19315,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -19128,6 +19330,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_4() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -19829,6 +20032,7 @@ define void @s_shuffle_v4i32_v4i32__7_5_5_5() {
define void @s_shuffle_v4i32_v4i32__7_u_5_5() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -19843,6 +20047,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_5_5() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -19857,6 +20062,7 @@ define void @s_shuffle_v4i32_v4i32__7_u_5_5() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -20275,6 +20481,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_5() {
define void @s_shuffle_v4i32_v4i32__7_7_u_5() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -20289,6 +20496,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -20303,6 +20511,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_5() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -21349,6 +21558,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_6() {
define void @s_shuffle_v4i32_v4i32__7_7_u_6() {
; GFX900-LABEL: s_shuffle_v4i32_v4i32__7_7_u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -21363,6 +21573,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_6() {
;
; GFX90A-LABEL: s_shuffle_v4i32_v4i32__7_7_u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -21377,6 +21588,7 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_6() {
;
; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
index ac7d9557ce765..1903d1d833ac9 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
@@ -57,40 +57,44 @@ define void @v_shuffle_v4i64_v2i64__0_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i64_v2i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -113,40 +117,44 @@ define void @v_shuffle_v4i64_v2i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i64_v2i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -433,18 +441,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -452,18 +461,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -471,18 +481,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -551,11 +562,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -565,11 +579,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -579,11 +596,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -844,11 +864,14 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -858,11 +881,14 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,11 +898,14 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -995,11 +1024,14 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1009,11 +1041,14 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1023,11 +1058,14 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1124,6 +1162,7 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -1144,6 +1183,7 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -1165,6 +1205,7 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -1399,14 +1440,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1419,14 +1460,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1436,18 +1477,17 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1871,10 +1911,11 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -1891,10 +1932,11 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -1911,10 +1953,11 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -2374,40 +2417,44 @@ define void @v_shuffle_v4i64_v2i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i64_v2i64__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2487,54 +2534,61 @@ define void @v_shuffle_v4i64_v2i64__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: v_mov_b32_e32 v6, v0
; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: v_mov_b32_e32 v6, v0
; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: v_mov_b32_e32 v6, v0
; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2760,13 +2814,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2776,13 +2831,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2792,13 +2848,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3177,11 +3234,14 @@ define void @v_shuffle_v4i64_v2i64__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3191,11 +3251,14 @@ define void @v_shuffle_v4i64_v2i64__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3205,11 +3268,14 @@ define void @v_shuffle_v4i64_v2i64__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3704,6 +3770,7 @@ define void @s_shuffle_v4i64_v2i64__0_u_u_u() {
define void @s_shuffle_v4i64_v2i64__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3717,6 +3784,7 @@ define void @s_shuffle_v4i64_v2i64__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3730,6 +3798,7 @@ define void @s_shuffle_v4i64_v2i64__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3763,6 +3832,7 @@ define void @s_shuffle_v4i64_v2i64__2_u_u_u() {
define void @s_shuffle_v4i64_v2i64__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3776,6 +3846,7 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3789,6 +3860,7 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3809,15 +3881,16 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() {
define void @s_shuffle_v4i64_v2i64__3_0_u_u() {
; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[16:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -3827,15 +3900,16 @@ define void @s_shuffle_v4i64_v2i64__3_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[16:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -3845,6 +3919,7 @@ define void @s_shuffle_v4i64_v2i64__3_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3925,6 +4000,7 @@ define void @s_shuffle_v4i64_v2i64__3_1_u_u() {
define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3940,6 +4016,7 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3955,6 +4032,7 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5415,6 +5493,7 @@ define void @s_shuffle_v4i64_v2i64__0_2_2_2() {
define void @s_shuffle_v4i64_v2i64__1_2_2_2() {
; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5428,6 +5507,7 @@ define void @s_shuffle_v4i64_v2i64__1_2_2_2() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5441,6 +5521,7 @@ define void @s_shuffle_v4i64_v2i64__1_2_2_2() {
;
; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5636,6 +5717,7 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_2() {
define void @s_shuffle_v4i64_v2i64__3_3_u_2() {
; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5653,6 +5735,7 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5670,6 +5753,7 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_2() {
;
; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
index 8dd4a40d00680..1840680f89805 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
@@ -18,37 +18,47 @@ define void @v_shuffle_v4i64_v3i64__u_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i64_v3i64__0_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -103,6 +113,7 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -116,6 +127,7 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -129,6 +141,7 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -199,6 +212,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -212,6 +226,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -225,6 +240,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -682,6 +698,7 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -701,6 +718,7 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -720,6 +738,7 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -847,6 +866,7 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -864,6 +884,7 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -881,6 +902,7 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -1280,7 +1302,10 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1294,7 +1319,10 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1308,7 +1336,10 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1493,7 +1524,10 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1507,7 +1541,10 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1521,7 +1558,10 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1680,6 +1720,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -1700,6 +1741,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -1721,6 +1763,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -2088,12 +2131,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2107,12 +2152,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2127,12 +2174,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2812,9 +2861,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2831,9 +2881,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2851,9 +2902,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3907,14 +3959,15 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v0, v10
; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3926,14 +3979,15 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: v_mov_b32_e32 v0, v10
; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3945,14 +3999,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4630,37 +4686,47 @@ define void @v_shuffle_v4i64_v3i64__u_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4i64_v3i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4715,6 +4781,7 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -4728,6 +4795,7 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -4741,6 +4809,7 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -4894,6 +4963,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -4911,6 +4981,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -4928,6 +4999,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -5270,14 +5342,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5287,14 +5359,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5304,14 +5376,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5978,6 +6050,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -5995,6 +6068,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -6012,6 +6086,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -7016,9 +7091,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -7032,9 +7108,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -7048,9 +7125,10 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7792,6 +7870,7 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -7805,6 +7884,7 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -7814,6 +7894,7 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7835,8 +7916,11 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -7846,8 +7930,11 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -7855,6 +7942,7 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7892,6 +7980,7 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -7905,6 +7994,7 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -7914,6 +8004,7 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7936,8 +8027,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -7947,8 +8041,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -7956,6 +8053,7 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7981,10 +8079,11 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -7999,10 +8098,11 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -8014,12 +8114,15 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -8088,13 +8191,16 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[20:25]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s24
+; GFX900-NEXT: s_mov_b32 s9, s25
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8104,13 +8210,16 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[20:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s24
+; GFX90A-NEXT: s_mov_b32 s9, s25
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8120,13 +8229,14 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -8143,12 +8253,15 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8156,12 +8269,15 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8169,6 +8285,7 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8213,12 +8330,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8228,12 +8346,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8241,6 +8360,7 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8268,12 +8388,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -8288,12 +8409,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -8305,15 +8427,16 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -8335,12 +8458,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -8355,12 +8479,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -8372,15 +8497,16 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -8458,14 +8584,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8473,14 +8602,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8488,6 +8620,7 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8512,16 +8645,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8529,16 +8663,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8546,6 +8681,7 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8945,6 +9081,7 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -8962,6 +9099,7 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -8975,6 +9113,7 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -9147,6 +9286,7 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9164,6 +9304,7 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9177,6 +9318,7 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -9349,10 +9491,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
@@ -9369,10 +9512,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
@@ -9386,12 +9530,15 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
@@ -9765,12 +9912,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -9785,12 +9933,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -9802,15 +9951,16 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -10357,10 +10507,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -10377,10 +10528,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -10394,12 +10546,15 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
@@ -10769,12 +10924,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -10789,12 +10945,13 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -10806,15 +10963,16 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -11734,17 +11892,18 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[20:25]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s24
+; GFX900-NEXT: s_mov_b32 s9, s25
+; GFX900-NEXT: s_mov_b32 s10, s24
+; GFX900-NEXT: s_mov_b32 s11, s25
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -11754,17 +11913,18 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[20:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s24
+; GFX90A-NEXT: s_mov_b32 s9, s25
+; GFX90A-NEXT: s_mov_b32 s10, s24
+; GFX90A-NEXT: s_mov_b32 s11, s25
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -11774,15 +11934,16 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s14, s4
; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -12143,6 +12304,7 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -12156,6 +12318,7 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -12165,6 +12328,7 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -12186,8 +12350,11 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -12197,8 +12364,11 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -12206,6 +12376,7 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -12363,14 +12534,17 @@ define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -12378,14 +12552,17 @@ define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -12393,6 +12570,7 @@ define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -12744,16 +12922,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -12761,16 +12940,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -12778,6 +12958,7 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -13368,14 +13549,17 @@ define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13383,14 +13567,17 @@ define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13398,6 +13585,7 @@ define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -13739,14 +13927,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13754,14 +13945,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13769,6 +13963,7 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -14630,14 +14825,15 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14647,14 +14843,15 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14662,6 +14859,7 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
;
; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
index ea9ef2f1ac94a..8118501d7ca79 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
@@ -142,6 +142,7 @@ define void @v_shuffle_v4i64_v4i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -155,6 +156,7 @@ define void @v_shuffle_v4i64_v4i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -168,6 +170,7 @@ define void @v_shuffle_v4i64_v4i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -278,6 +281,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -291,6 +295,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -304,6 +309,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -918,16 +924,17 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -940,6 +947,7 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -959,6 +967,7 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -1135,6 +1144,7 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -1151,6 +1161,7 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -1167,6 +1178,7 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -1685,7 +1697,10 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1699,7 +1714,10 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1713,7 +1731,10 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1949,7 +1970,10 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1963,7 +1987,10 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,7 +2004,10 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2201,6 +2231,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
@@ -2221,6 +2252,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
@@ -2242,6 +2274,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
@@ -2736,16 +2769,17 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v0
+; GFX900-NEXT: v_mov_b32_e32 v11, v1
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2758,12 +2792,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: v_mov_b32_e32 v6, v8
; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2778,12 +2813,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: v_mov_b32_e32 v6, v8
; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3706,9 +3742,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v10
; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3725,9 +3762,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v10
; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3745,9 +3783,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5184,6 +5223,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
@@ -5203,6 +5243,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: v_mov_b32_e32 v6, v4
; GFX90A-NEXT: v_mov_b32_e32 v7, v5
@@ -5219,6 +5260,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
@@ -6654,6 +6696,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
@@ -6673,6 +6716,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
@@ -6692,6 +6736,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
@@ -7758,6 +7803,7 @@ define void @v_shuffle_v4i64_v4i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -7771,6 +7817,7 @@ define void @v_shuffle_v4i64_v4i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -7784,6 +7831,7 @@ define void @v_shuffle_v4i64_v4i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -7989,6 +8037,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -8006,6 +8055,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -8023,6 +8073,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -8484,12 +8535,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -8500,12 +8553,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -8516,12 +8571,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9424,9 +9481,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -9440,9 +9498,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -9456,9 +9515,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10855,9 +10915,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -10871,9 +10932,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -10887,9 +10949,10 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12247,6 +12310,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
@@ -12263,6 +12327,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
@@ -12279,6 +12344,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
@@ -13263,6 +13329,7 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13276,6 +13343,7 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13285,6 +13353,7 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13306,8 +13375,11 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13317,8 +13389,11 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13326,6 +13401,7 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13347,10 +13423,11 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13360,10 +13437,11 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13371,6 +13449,7 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13408,6 +13487,7 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13421,6 +13501,7 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13430,6 +13511,7 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13452,8 +13534,11 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13463,8 +13548,11 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13472,6 +13560,7 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13494,10 +13583,11 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13507,10 +13597,11 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13518,6 +13609,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13543,10 +13635,11 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -13561,10 +13654,11 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -13576,14 +13670,14 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -13657,15 +13751,16 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13675,15 +13770,16 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13693,13 +13789,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -13718,13 +13815,16 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13734,13 +13834,16 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13750,13 +13853,14 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s6
; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -13773,14 +13877,15 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13788,14 +13893,15 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13803,6 +13909,7 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13847,12 +13954,13 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13862,12 +13970,13 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13875,6 +13984,7 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13899,10 +14009,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13912,10 +14025,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13923,6 +14039,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13950,12 +14067,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -13970,12 +14088,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -13987,14 +14106,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -14015,12 +14137,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -14035,12 +14158,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -14052,14 +14176,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -14134,15 +14261,18 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s26
+; GFX900-NEXT: s_mov_b32 s11, s27
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14152,15 +14282,18 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s26
+; GFX90A-NEXT: s_mov_b32 s11, s27
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14170,15 +14303,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
; GFX942-NEXT: s_mov_b32 s12, s6
; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -14195,14 +14329,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14210,14 +14347,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14225,6 +14365,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14249,14 +14390,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14264,14 +14408,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14279,6 +14426,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14327,14 +14475,15 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14344,14 +14493,15 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14359,6 +14509,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14885,6 +15036,7 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -14902,6 +15054,7 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -14915,6 +15068,7 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15150,6 +15304,7 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -15167,6 +15322,7 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -15180,6 +15336,7 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15425,10 +15582,11 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
@@ -15445,10 +15603,11 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
@@ -15462,14 +15621,15 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
@@ -15991,12 +16151,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -16011,12 +16172,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -16028,14 +16190,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -16803,10 +16968,11 @@ define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -16823,10 +16989,11 @@ define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -16840,14 +17007,15 @@ define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
@@ -17365,12 +17533,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17385,12 +17554,13 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17402,14 +17572,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -18688,15 +18861,18 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s26
+; GFX900-NEXT: s_mov_b32 s11, s27
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -18706,15 +18882,18 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s26
+; GFX90A-NEXT: s_mov_b32 s11, s27
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -18724,15 +18903,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
; GFX942-NEXT: s_mov_b32 s14, s4
; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -20509,6 +20689,7 @@ define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -20522,6 +20703,7 @@ define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -20531,6 +20713,7 @@ define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20552,8 +20735,11 @@ define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -20563,8 +20749,11 @@ define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -20572,6 +20761,7 @@ define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20593,10 +20783,11 @@ define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -20606,10 +20797,11 @@ define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -20617,6 +20809,7 @@ define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20838,16 +21031,17 @@ define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -20855,16 +21049,17 @@ define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -20872,6 +21067,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -21367,14 +21563,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -21382,14 +21581,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -21397,6 +21599,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -22199,16 +22402,17 @@ define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -22216,16 +22420,17 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -22233,6 +22438,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -22708,14 +22914,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -22723,14 +22932,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -22738,6 +22950,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -23932,14 +24145,15 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -23949,14 +24163,15 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -23964,6 +24179,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() {
;
; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
index b30af835a7882..0ef978a889880 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
@@ -57,40 +57,44 @@ define void @v_shuffle_v4p0_v2p0__0_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p0_v2p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -113,40 +117,44 @@ define void @v_shuffle_v4p0_v2p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p0_v2p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -433,18 +441,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -452,18 +461,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -471,18 +481,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -551,11 +562,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -565,11 +579,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -579,11 +596,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -844,11 +864,14 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -858,11 +881,14 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,11 +898,14 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -995,11 +1024,14 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1009,11 +1041,14 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1023,11 +1058,14 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1124,6 +1162,7 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -1144,6 +1183,7 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -1165,6 +1205,7 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -1399,14 +1440,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1419,14 +1460,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1436,18 +1477,17 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1871,10 +1911,11 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -1891,10 +1932,11 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -1911,10 +1953,11 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -2374,40 +2417,44 @@ define void @v_shuffle_v4p0_v2p0__0_2_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p0_v2p0__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2487,54 +2534,61 @@ define void @v_shuffle_v4p0_v2p0__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: v_mov_b32_e32 v6, v0
; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: v_mov_b32_e32 v6, v0
; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: v_mov_b32_e32 v6, v0
; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2760,13 +2814,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2776,13 +2831,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2792,13 +2848,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3177,11 +3234,14 @@ define void @v_shuffle_v4p0_v2p0__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3191,11 +3251,14 @@ define void @v_shuffle_v4p0_v2p0__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3205,11 +3268,14 @@ define void @v_shuffle_v4p0_v2p0__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3704,6 +3770,7 @@ define void @s_shuffle_v4p0_v2p0__0_u_u_u() {
define void @s_shuffle_v4p0_v2p0__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3717,6 +3784,7 @@ define void @s_shuffle_v4p0_v2p0__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3730,6 +3798,7 @@ define void @s_shuffle_v4p0_v2p0__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3763,6 +3832,7 @@ define void @s_shuffle_v4p0_v2p0__2_u_u_u() {
define void @s_shuffle_v4p0_v2p0__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3776,6 +3846,7 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3789,6 +3860,7 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3809,15 +3881,16 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() {
define void @s_shuffle_v4p0_v2p0__3_0_u_u() {
; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[16:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -3827,15 +3900,16 @@ define void @s_shuffle_v4p0_v2p0__3_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[16:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -3845,6 +3919,7 @@ define void @s_shuffle_v4p0_v2p0__3_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -3925,6 +4000,7 @@ define void @s_shuffle_v4p0_v2p0__3_1_u_u() {
define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -3940,6 +4016,7 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -3955,6 +4032,7 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5415,6 +5493,7 @@ define void @s_shuffle_v4p0_v2p0__0_2_2_2() {
define void @s_shuffle_v4p0_v2p0__1_2_2_2() {
; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5428,6 +5507,7 @@ define void @s_shuffle_v4p0_v2p0__1_2_2_2() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5441,6 +5521,7 @@ define void @s_shuffle_v4p0_v2p0__1_2_2_2() {
;
; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -5636,6 +5717,7 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_2() {
define void @s_shuffle_v4p0_v2p0__3_3_u_2() {
; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -5653,6 +5735,7 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -5670,6 +5753,7 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_2() {
;
; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
index e6ac554735eee..dfdb2fbbe19e1 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
@@ -18,37 +18,47 @@ define void @v_shuffle_v4p0_v3p0__u_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p0_v3p0__0_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__0_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__0_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -103,6 +113,7 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -116,6 +127,7 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -129,6 +141,7 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -199,6 +212,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -212,6 +226,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -225,6 +240,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -682,6 +698,7 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -701,6 +718,7 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -720,6 +738,7 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -847,6 +866,7 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -864,6 +884,7 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -881,6 +902,7 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -1280,7 +1302,10 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1294,7 +1319,10 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1308,7 +1336,10 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1493,7 +1524,10 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1507,7 +1541,10 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1521,7 +1558,10 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1680,6 +1720,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -1700,6 +1741,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -1721,6 +1763,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -2088,12 +2131,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2107,12 +2152,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2127,12 +2174,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2812,9 +2861,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2831,9 +2881,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2851,9 +2902,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3907,14 +3959,15 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v0, v10
; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3926,14 +3979,15 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: v_mov_b32_e32 v0, v10
; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3945,14 +3999,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4630,37 +4686,47 @@ define void @v_shuffle_v4p0_v3p0__u_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p0_v3p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__0_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__0_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4715,6 +4781,7 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -4728,6 +4795,7 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -4741,6 +4809,7 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -4894,6 +4963,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -4911,6 +4981,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -4928,6 +4999,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -5270,14 +5342,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5287,14 +5359,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5304,14 +5376,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5978,6 +6050,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
@@ -5995,6 +6068,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
@@ -6012,6 +6086,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
@@ -7016,9 +7091,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -7032,9 +7108,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -7048,9 +7125,10 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7792,6 +7870,7 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -7805,6 +7884,7 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -7814,6 +7894,7 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7835,8 +7916,11 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -7846,8 +7930,11 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -7855,6 +7942,7 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7892,6 +7980,7 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -7905,6 +7994,7 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -7914,6 +8004,7 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7936,8 +8027,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -7947,8 +8041,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -7956,6 +8053,7 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -7981,10 +8079,11 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -7999,10 +8098,11 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -8014,12 +8114,15 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -8088,13 +8191,16 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[20:25]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s24
+; GFX900-NEXT: s_mov_b32 s9, s25
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8104,13 +8210,16 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[20:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s24
+; GFX90A-NEXT: s_mov_b32 s9, s25
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8120,13 +8229,14 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -8143,12 +8253,15 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8156,12 +8269,15 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8169,6 +8285,7 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8213,12 +8330,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8228,12 +8346,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8241,6 +8360,7 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8268,12 +8388,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -8288,12 +8409,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -8305,15 +8427,16 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -8335,12 +8458,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -8355,12 +8479,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -8372,15 +8497,16 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -8458,14 +8584,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8473,14 +8602,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8488,6 +8620,7 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8512,16 +8645,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8529,16 +8663,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8546,6 +8681,7 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -8945,6 +9081,7 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -8962,6 +9099,7 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -8975,6 +9113,7 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -9147,6 +9286,7 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -9164,6 +9304,7 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -9177,6 +9318,7 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -9349,10 +9491,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
@@ -9369,10 +9512,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
@@ -9386,12 +9530,15 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
@@ -9765,12 +9912,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -9785,12 +9933,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -9802,15 +9951,16 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -10357,10 +10507,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -10377,10 +10528,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -10394,12 +10546,15 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
@@ -10769,12 +10924,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -10789,12 +10945,13 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -10806,15 +10963,16 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
@@ -11734,17 +11892,18 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[20:25]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s24
+; GFX900-NEXT: s_mov_b32 s9, s25
+; GFX900-NEXT: s_mov_b32 s10, s24
+; GFX900-NEXT: s_mov_b32 s11, s25
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -11754,17 +11913,18 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[20:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s24
+; GFX90A-NEXT: s_mov_b32 s9, s25
+; GFX90A-NEXT: s_mov_b32 s10, s24
+; GFX90A-NEXT: s_mov_b32 s11, s25
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -11774,15 +11934,16 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s8, s16
+; GFX942-NEXT: s_mov_b32 s9, s17
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: s_mov_b32 s14, s4
; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -12143,6 +12304,7 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -12156,6 +12318,7 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -12165,6 +12328,7 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -12186,8 +12350,11 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -12197,8 +12364,11 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -12206,6 +12376,7 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -12363,14 +12534,17 @@ define void @s_shuffle_v4p0_v3p0__5_3_3_3() {
define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -12378,14 +12552,17 @@ define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -12393,6 +12570,7 @@ define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -12744,16 +12922,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_3() {
define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -12761,16 +12940,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -12778,6 +12958,7 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -13368,14 +13549,17 @@ define void @s_shuffle_v4p0_v3p0__5_4_4_4() {
define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13383,14 +13567,17 @@ define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13398,6 +13585,7 @@ define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -13739,14 +13927,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_4() {
define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s20
+; GFX900-NEXT: s_mov_b32 s9, s21
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13754,14 +13945,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s20
+; GFX90A-NEXT: s_mov_b32 s9, s21
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13769,6 +13963,7 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
@@ -14630,14 +14825,15 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14647,14 +14843,15 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14662,6 +14859,7 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
;
; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
index ce1c54129f706..ae949dc1cb076 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
@@ -142,6 +142,7 @@ define void @v_shuffle_v4p0_v4p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -155,6 +156,7 @@ define void @v_shuffle_v4p0_v4p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -168,6 +170,7 @@ define void @v_shuffle_v4p0_v4p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -278,6 +281,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -291,6 +295,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -304,6 +309,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -918,16 +924,17 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -940,6 +947,7 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -959,6 +967,7 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -1135,6 +1144,7 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -1151,6 +1161,7 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -1167,6 +1178,7 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -1685,7 +1697,10 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1699,7 +1714,10 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1713,7 +1731,10 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1949,7 +1970,10 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1963,7 +1987,10 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,7 +2004,10 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2201,6 +2231,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v8
; GFX900-NEXT: v_mov_b32_e32 v1, v9
@@ -2221,6 +2252,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v1, v9
@@ -2242,6 +2274,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
@@ -2736,16 +2769,17 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v0
+; GFX900-NEXT: v_mov_b32_e32 v11, v1
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2758,12 +2792,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
; GFX90A-NEXT: v_mov_b32_e32 v6, v8
; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2778,12 +2813,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: v_mov_b32_e32 v6, v8
; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3706,9 +3742,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v10
; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3725,9 +3762,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v10
; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3745,9 +3783,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5184,6 +5223,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
@@ -5203,6 +5243,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: v_mov_b32_e32 v6, v4
; GFX90A-NEXT: v_mov_b32_e32 v7, v5
@@ -5219,6 +5260,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
@@ -6654,6 +6696,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
@@ -6673,6 +6716,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
@@ -6692,6 +6736,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
@@ -7758,6 +7803,7 @@ define void @v_shuffle_v4p0_v4p0__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -7771,6 +7817,7 @@ define void @v_shuffle_v4p0_v4p0__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -7784,6 +7831,7 @@ define void @v_shuffle_v4p0_v4p0__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -7989,6 +8037,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
@@ -8006,6 +8055,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
@@ -8023,6 +8073,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
@@ -8484,12 +8535,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -8500,12 +8553,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -8516,12 +8571,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9424,9 +9481,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -9440,9 +9498,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -9456,9 +9515,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10855,9 +10915,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -10871,9 +10932,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -10887,9 +10949,10 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12247,6 +12310,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
@@ -12263,6 +12327,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
@@ -12279,6 +12344,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
@@ -13263,6 +13329,7 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13276,6 +13343,7 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13285,6 +13353,7 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13306,8 +13375,11 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13317,8 +13389,11 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13326,6 +13401,7 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13347,10 +13423,11 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13360,10 +13437,11 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13371,6 +13449,7 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13408,6 +13487,7 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13421,6 +13501,7 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13430,6 +13511,7 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13452,8 +13534,11 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13463,8 +13548,11 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13472,6 +13560,7 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13494,10 +13583,11 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13507,10 +13597,11 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13518,6 +13609,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13543,10 +13635,11 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -13561,10 +13654,11 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -13576,14 +13670,14 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -13657,15 +13751,16 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13675,15 +13770,16 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13693,13 +13789,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -13718,13 +13815,16 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13734,13 +13834,16 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13750,13 +13853,14 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s6
; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -13773,14 +13877,15 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13788,14 +13893,15 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13803,6 +13909,7 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13847,12 +13954,13 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13862,12 +13970,13 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13875,6 +13984,7 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13899,10 +14009,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -13912,10 +14025,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -13923,6 +14039,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -13950,12 +14067,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -13970,12 +14088,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -13987,14 +14106,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -14015,12 +14137,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -14035,12 +14158,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -14052,14 +14176,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -14134,15 +14261,18 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s26
+; GFX900-NEXT: s_mov_b32 s11, s27
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14152,15 +14282,18 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s26
+; GFX90A-NEXT: s_mov_b32 s11, s27
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14170,15 +14303,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
; GFX942-NEXT: s_mov_b32 s12, s6
; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -14195,14 +14329,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14210,14 +14347,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14225,6 +14365,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14249,14 +14390,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14264,14 +14408,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14279,6 +14426,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14327,14 +14475,15 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -14344,14 +14493,15 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -14359,6 +14509,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -14885,6 +15036,7 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -14902,6 +15054,7 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -14915,6 +15068,7 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15150,6 +15304,7 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
@@ -15167,6 +15322,7 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
@@ -15180,6 +15336,7 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -15425,10 +15582,11 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
@@ -15445,10 +15603,11 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
@@ -15462,14 +15621,15 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
@@ -15991,12 +16151,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -16011,12 +16172,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -16028,14 +16190,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -16803,10 +16968,11 @@ define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -16823,10 +16989,11 @@ define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -16840,14 +17007,15 @@ define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
@@ -17365,12 +17533,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17385,12 +17554,13 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17402,14 +17572,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -18688,15 +18861,18 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[20:27]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s26
+; GFX900-NEXT: s_mov_b32 s9, s27
+; GFX900-NEXT: s_mov_b32 s10, s26
+; GFX900-NEXT: s_mov_b32 s11, s27
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -18706,15 +18882,18 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[20:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s26
+; GFX90A-NEXT: s_mov_b32 s9, s27
+; GFX90A-NEXT: s_mov_b32 s10, s26
+; GFX90A-NEXT: s_mov_b32 s11, s27
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -18724,15 +18903,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s18
+; GFX942-NEXT: s_mov_b32 s11, s19
; GFX942-NEXT: s_mov_b32 s14, s4
; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
@@ -20509,6 +20689,7 @@ define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -20522,6 +20703,7 @@ define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -20531,6 +20713,7 @@ define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20552,8 +20735,11 @@ define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -20563,8 +20749,11 @@ define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -20572,6 +20761,7 @@ define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20593,10 +20783,11 @@ define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -20606,10 +20797,11 @@ define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -20617,6 +20809,7 @@ define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -20838,16 +21031,17 @@ define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -20855,16 +21049,17 @@ define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -20872,6 +21067,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -21367,14 +21563,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -21382,14 +21581,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -21397,6 +21599,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -22199,16 +22402,17 @@ define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -22216,16 +22420,17 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -22233,6 +22438,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -22708,14 +22914,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -22723,14 +22932,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -22738,6 +22950,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -23932,14 +24145,15 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -23949,14 +24163,15 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -23964,6 +24179,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() {
;
; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
index 3b5690562c38a..0905f20a8d078 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
@@ -59,35 +59,39 @@ define void @v_shuffle_v4p3_v2p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v2p3__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -112,35 +116,39 @@ define void @v_shuffle_v4p3_v2p3__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v2p3__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -271,28 +279,30 @@ define void @v_shuffle_v4p3_v2p3__3_2_u_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v2p3__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2068,35 +2078,39 @@ define void @v_shuffle_v4p3_v2p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v2p3__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:1]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:1]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2371,43 +2385,47 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_2(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v2p3__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v2p3__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:3]
+; GFX942-NEXT: ; def v[4:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3201,6 +3219,7 @@ define void @s_shuffle_v4p3_v2p3__0_u_u_u() {
define void @s_shuffle_v4p3_v2p3__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v2p3__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3213,6 +3232,7 @@ define void @s_shuffle_v4p3_v2p3__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v2p3__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3225,6 +3245,7 @@ define void @s_shuffle_v4p3_v2p3__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v2p3__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3257,6 +3278,7 @@ define void @s_shuffle_v4p3_v2p3__2_u_u_u() {
define void @s_shuffle_v4p3_v2p3__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v2p3__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3269,6 +3291,7 @@ define void @s_shuffle_v4p3_v2p3__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v2p3__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3281,6 +3304,7 @@ define void @s_shuffle_v4p3_v2p3__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3300,6 +3324,7 @@ define void @s_shuffle_v4p3_v2p3__3_u_u_u() {
define void @s_shuffle_v4p3_v2p3__3_0_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v2p3__3_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3316,6 +3341,7 @@ define void @s_shuffle_v4p3_v2p3__3_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v2p3__3_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3332,6 +3358,7 @@ define void @s_shuffle_v4p3_v2p3__3_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -3407,6 +3434,7 @@ define void @s_shuffle_v4p3_v2p3__3_1_u_u() {
define void @s_shuffle_v4p3_v2p3__3_2_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v2p3__3_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -3420,6 +3448,7 @@ define void @s_shuffle_v4p3_v2p3__3_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v2p3__3_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -3433,6 +3462,7 @@ define void @s_shuffle_v4p3_v2p3__3_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4731,6 +4761,7 @@ define void @s_shuffle_v4p3_v2p3__0_2_2_2() {
define void @s_shuffle_v4p3_v2p3__1_2_2_2() {
; GFX900-LABEL: s_shuffle_v4p3_v2p3__1_2_2_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4743,6 +4774,7 @@ define void @s_shuffle_v4p3_v2p3__1_2_2_2() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v2p3__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4755,6 +4787,7 @@ define void @s_shuffle_v4p3_v2p3__1_2_2_2() {
;
; GFX942-LABEL: s_shuffle_v4p3_v2p3__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
@@ -4930,6 +4963,7 @@ define void @s_shuffle_v4p3_v2p3__3_3_2_2() {
define void @s_shuffle_v4p3_v2p3__3_3_u_2() {
; GFX900-LABEL: s_shuffle_v4p3_v2p3__3_3_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:5]
@@ -4944,6 +4978,7 @@ define void @s_shuffle_v4p3_v2p3__3_3_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v2p3__3_3_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:5]
@@ -4958,6 +4993,7 @@ define void @s_shuffle_v4p3_v2p3__3_3_u_2() {
;
; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
index 8039e126590b9..6453be2e7a548 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
@@ -59,35 +59,39 @@ define void @v_shuffle_v4p3_v3p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -101,11 +105,12 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -113,11 +118,12 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -125,11 +131,12 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -154,35 +161,39 @@ define void @v_shuffle_v4p3_v3p3__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -197,11 +208,12 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -209,11 +221,12 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -221,11 +234,12 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -255,15 +269,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -271,15 +286,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -347,15 +363,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -363,15 +380,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -379,15 +397,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -412,28 +431,30 @@ define void @v_shuffle_v4p3_v3p3__5_3_u_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -491,12 +512,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -504,12 +526,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -517,12 +540,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -537,15 +561,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -553,15 +579,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -569,16 +597,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -609,16 +638,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,17 +656,18 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -705,41 +736,47 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v6
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -765,29 +802,32 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_u(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1179,29 +1219,32 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1374,29 +1417,32 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__3_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1529,16 +1575,17 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1546,16 +1593,17 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,16 +1611,17 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1884,16 +1933,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1901,16 +1951,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1918,16 +1969,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2545,16 +2597,17 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2562,17 +2615,18 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2876,16 +2930,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v7
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2893,15 +2948,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2909,16 +2966,18 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3268,29 +3327,31 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3416,8 +3477,9 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
@@ -3433,6 +3495,7 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
@@ -3871,16 +3934,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3888,16 +3952,17 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v8
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3986,14 +4051,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
@@ -4004,14 +4070,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v6
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
@@ -4108,8 +4175,9 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
@@ -4126,6 +4194,7 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
@@ -4200,35 +4269,39 @@ define void @v_shuffle_v4p3_v3p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4242,11 +4315,12 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4254,11 +4328,12 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4266,11 +4341,12 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4395,36 +4471,39 @@ define void @v_shuffle_v4p3_v3p3__5_3_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v3p3__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v0, v6
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
@@ -4432,7 +4511,7 @@ define void @v_shuffle_v4p3_v3p3__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4726,43 +4805,47 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_3(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v3p3__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5375,29 +5458,32 @@ define void @v_shuffle_v4p3_v3p3__5_u_4_4(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5684,40 +5770,45 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v5
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6083,8 +6174,9 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
@@ -6101,6 +6193,7 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
@@ -6237,29 +6330,31 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6605,13 +6700,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6619,13 +6715,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6721,9 +6818,11 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -6739,9 +6838,11 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v1, v4
; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -6877,6 +6978,7 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_5(ptr addrspace(1) inreg %ptr) {
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
@@ -6891,6 +6993,7 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_5(ptr addrspace(1) inreg %ptr) {
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
@@ -6966,6 +7069,7 @@ define void @s_shuffle_v4p3_v3p3__0_u_u_u() {
define void @s_shuffle_v4p3_v3p3__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -6978,6 +7082,7 @@ define void @s_shuffle_v4p3_v3p3__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -6990,6 +7095,7 @@ define void @s_shuffle_v4p3_v3p3__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7008,6 +7114,7 @@ define void @s_shuffle_v4p3_v3p3__1_u_u_u() {
define void @s_shuffle_v4p3_v3p3__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7020,6 +7127,7 @@ define void @s_shuffle_v4p3_v3p3__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7032,6 +7140,7 @@ define void @s_shuffle_v4p3_v3p3__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7064,6 +7173,7 @@ define void @s_shuffle_v4p3_v3p3__3_u_u_u() {
define void @s_shuffle_v4p3_v3p3__4_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__4_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7076,6 +7186,7 @@ define void @s_shuffle_v4p3_v3p3__4_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__4_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7088,6 +7199,7 @@ define void @s_shuffle_v4p3_v3p3__4_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__4_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7107,6 +7219,7 @@ define void @s_shuffle_v4p3_v3p3__4_u_u_u() {
define void @s_shuffle_v4p3_v3p3__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7119,6 +7232,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7131,6 +7245,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7150,14 +7265,15 @@ define void @s_shuffle_v4p3_v3p3__5_u_u_u() {
define void @s_shuffle_v4p3_v3p3__5_0_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7166,14 +7282,15 @@ define void @s_shuffle_v4p3_v3p3__5_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7182,6 +7299,7 @@ define void @s_shuffle_v4p3_v3p3__5_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7257,14 +7375,15 @@ define void @s_shuffle_v4p3_v3p3__5_1_u_u() {
define void @s_shuffle_v4p3_v3p3__5_2_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7273,14 +7392,15 @@ define void @s_shuffle_v4p3_v3p3__5_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7289,6 +7409,7 @@ define void @s_shuffle_v4p3_v3p3__5_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7312,6 +7433,7 @@ define void @s_shuffle_v4p3_v3p3__5_2_u_u() {
define void @s_shuffle_v4p3_v3p3__5_3_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7325,6 +7447,7 @@ define void @s_shuffle_v4p3_v3p3__5_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7338,6 +7461,7 @@ define void @s_shuffle_v4p3_v3p3__5_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7377,6 +7501,7 @@ define void @s_shuffle_v4p3_v3p3__5_4_u_u() {
define void @s_shuffle_v4p3_v3p3__5_5_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7390,6 +7515,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7403,6 +7529,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7423,15 +7550,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_u() {
define void @s_shuffle_v4p3_v3p3__5_5_0_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7440,15 +7568,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7457,6 +7586,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7481,15 +7611,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_u() {
define void @s_shuffle_v4p3_v3p3__5_5_1_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -7498,15 +7629,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -7515,6 +7647,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7594,6 +7727,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_u() {
define void @s_shuffle_v4p3_v3p3__5_5_3_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7608,6 +7742,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7622,6 +7757,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -7643,6 +7779,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_u() {
define void @s_shuffle_v4p3_v3p3__5_5_4_u() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -7657,6 +7794,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -7671,6 +7809,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8011,6 +8150,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_5_5() {
define void @s_shuffle_v4p3_v3p3__u_0_0_0() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__u_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -8025,6 +8165,7 @@ define void @s_shuffle_v4p3_v3p3__u_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -8039,6 +8180,7 @@ define void @s_shuffle_v4p3_v3p3__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8181,6 +8323,7 @@ define void @s_shuffle_v4p3_v3p3__2_0_0_0() {
define void @s_shuffle_v4p3_v3p3__3_0_0_0() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__3_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -8195,6 +8338,7 @@ define void @s_shuffle_v4p3_v3p3__3_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__3_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -8209,6 +8353,7 @@ define void @s_shuffle_v4p3_v3p3__3_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__3_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8351,14 +8496,15 @@ define void @s_shuffle_v4p3_v3p3__5_0_0_0() {
define void @s_shuffle_v4p3_v3p3__5_u_0_0() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
@@ -8368,14 +8514,15 @@ define void @s_shuffle_v4p3_v3p3__5_u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
@@ -8385,6 +8532,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -8711,15 +8859,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_0() {
define void @s_shuffle_v4p3_v3p3__5_5_u_0() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -8728,15 +8877,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -8745,6 +8895,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -9226,14 +9377,15 @@ define void @s_shuffle_v4p3_v3p3__5_1_1_1() {
define void @s_shuffle_v4p3_v3p3__5_u_1_1() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_u_1_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -9243,14 +9395,15 @@ define void @s_shuffle_v4p3_v3p3__5_u_1_1() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_u_1_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -9260,6 +9413,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_1_1() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_1_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -9586,15 +9740,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_1() {
define void @s_shuffle_v4p3_v3p3__5_5_u_1() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -9603,15 +9758,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_1() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -9620,6 +9776,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_1() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10446,15 +10603,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_2() {
define void @s_shuffle_v4p3_v3p3__5_5_u_2() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:10]
+; GFX900-NEXT: ; def s[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:6]
+; GFX900-NEXT: ; def s[12:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s10
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s14
; GFX900-NEXT: s_mov_b32 s11, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -10463,15 +10621,16 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:10]
+; GFX90A-NEXT: ; def s[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:6]
+; GFX90A-NEXT: ; def s[12:14]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s10
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s14
; GFX90A-NEXT: s_mov_b32 s11, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -10480,6 +10639,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_2() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10802,6 +10962,7 @@ define void @s_shuffle_v4p3_v3p3__0_3_3_3() {
define void @s_shuffle_v4p3_v3p3__1_3_3_3() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__1_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -10814,6 +10975,7 @@ define void @s_shuffle_v4p3_v3p3__1_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__1_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -10826,6 +10988,7 @@ define void @s_shuffle_v4p3_v3p3__1_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -10844,6 +11007,7 @@ define void @s_shuffle_v4p3_v3p3__1_3_3_3() {
define void @s_shuffle_v4p3_v3p3__2_3_3_3() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__2_3_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -10856,6 +11020,7 @@ define void @s_shuffle_v4p3_v3p3__2_3_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__2_3_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -10868,6 +11033,7 @@ define void @s_shuffle_v4p3_v3p3__2_3_3_3() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_3_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11004,6 +11170,7 @@ define void @s_shuffle_v4p3_v3p3__5_3_3_3() {
define void @s_shuffle_v4p3_v3p3__5_u_3_3() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_u_3_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11018,6 +11185,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_3_3() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_u_3_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11032,6 +11200,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_3_3() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_3_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11337,6 +11506,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_3() {
define void @s_shuffle_v4p3_v3p3__5_5_u_3() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_3:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11351,6 +11521,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_3() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_3:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11365,6 +11536,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_3() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_3:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -11874,6 +12046,7 @@ define void @s_shuffle_v4p3_v3p3__5_4_4_4() {
define void @s_shuffle_v4p3_v3p3__5_u_4_4() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -11888,6 +12061,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -11902,6 +12076,7 @@ define void @s_shuffle_v4p3_v3p3__5_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -12207,6 +12382,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_4() {
define void @s_shuffle_v4p3_v3p3__5_5_u_4() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -12221,6 +12397,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -12235,6 +12412,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
@@ -12989,6 +13167,7 @@ define void @s_shuffle_v4p3_v3p3__5_4_5_5() {
define void @s_shuffle_v4p3_v3p3__5_5_u_5() {
; GFX900-LABEL: s_shuffle_v4p3_v3p3__5_5_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:6]
@@ -13003,6 +13182,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v3p3__5_5_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:6]
@@ -13017,6 +13197,7 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_5() {
;
; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:2]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
index eeab42ae40d7f..c8ceae975e063 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
@@ -61,9 +61,10 @@ define void @v_shuffle_v4p3_v4p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -73,9 +74,10 @@ define void @v_shuffle_v4p3_v4p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -85,9 +87,10 @@ define void @v_shuffle_v4p3_v4p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -103,33 +106,37 @@ define void @v_shuffle_v4p3_v4p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -141,37 +148,41 @@ define void @v_shuffle_v4p3_v4p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -198,9 +209,10 @@ define void @v_shuffle_v4p3_v4p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -210,9 +222,10 @@ define void @v_shuffle_v4p3_v4p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -222,9 +235,10 @@ define void @v_shuffle_v4p3_v4p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -241,33 +255,37 @@ define void @v_shuffle_v4p3_v4p3__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -280,37 +298,41 @@ define void @v_shuffle_v4p3_v4p3__6_u_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -327,13 +349,14 @@ define void @v_shuffle_v4p3_v4p3__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -434,13 +457,14 @@ define void @v_shuffle_v4p3_v4p3__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: ; implicit-def: $vgpr7_vgpr8_vgpr9_vgpr10
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx4 v11, v[7:10], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -484,49 +508,53 @@ define void @v_shuffle_v4p3_v4p3__7_2_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -539,14 +567,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,14 +655,15 @@ define void @v_shuffle_v4p3_v4p3__7_5_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -670,40 +700,44 @@ define void @v_shuffle_v4p3_v4p3__7_6_u_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -720,14 +754,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -737,14 +772,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,15 +790,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -779,14 +815,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -796,14 +833,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -813,15 +851,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -890,52 +928,56 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -948,41 +990,47 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -995,40 +1043,47 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1087,43 +1142,47 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_u(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1578,11 +1637,12 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1592,11 +1652,12 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1605,12 +1666,13 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1820,11 +1882,12 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
+; GFX900-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,11 +1897,12 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1848,11 +1912,12 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2050,14 +2115,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2067,14 +2133,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2084,15 +2151,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v9, v0
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2529,14 +2596,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7_vgpr8
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2546,14 +2614,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v9, v0
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2563,15 +2632,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v0
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3400,14 +3469,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3417,14 +3487,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v1
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3434,15 +3505,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3877,14 +3948,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3894,14 +3966,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v5
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3911,15 +3984,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v5
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5196,48 +5269,52 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; implicit-def: $vgpr7_vgpr8_vgpr9_vgpr10
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, v6
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: global_store_dwordx4 v11, v[7:10], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v7
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v11, v2
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v7
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v11, v2
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6974,9 +7051,10 @@ define void @v_shuffle_v4p3_v4p3__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6986,9 +7064,10 @@ define void @v_shuffle_v4p3_v4p3__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6998,9 +7077,10 @@ define void @v_shuffle_v4p3_v4p3__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7016,33 +7096,37 @@ define void @v_shuffle_v4p3_v4p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7054,37 +7138,41 @@ define void @v_shuffle_v4p3_v4p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__3_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7256,43 +7344,47 @@ define void @v_shuffle_v4p3_v4p3__7_4_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7694,41 +7786,47 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_4(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8548,43 +8646,47 @@ define void @v_shuffle_v4p3_v4p3__7_5_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v1
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v1
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8980,40 +9082,47 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_5(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10248,43 +10357,47 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_6(ptr addrspace(1) inreg %ptr) {
define void @v_shuffle_v4p3_v4p3__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v7, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v7, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v7, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11967,6 +12080,7 @@ define void @s_shuffle_v4p3_v4p3__0_u_u_u() {
define void @s_shuffle_v4p3_v4p3__1_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__1_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -11979,6 +12093,7 @@ define void @s_shuffle_v4p3_v4p3__1_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__1_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -11991,6 +12106,7 @@ define void @s_shuffle_v4p3_v4p3__1_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12009,6 +12125,7 @@ define void @s_shuffle_v4p3_v4p3__1_u_u_u() {
define void @s_shuffle_v4p3_v4p3__2_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__2_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12021,6 +12138,7 @@ define void @s_shuffle_v4p3_v4p3__2_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__2_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12033,6 +12151,7 @@ define void @s_shuffle_v4p3_v4p3__2_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12051,6 +12170,7 @@ define void @s_shuffle_v4p3_v4p3__2_u_u_u() {
define void @s_shuffle_v4p3_v4p3__3_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__3_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12063,6 +12183,7 @@ define void @s_shuffle_v4p3_v4p3__3_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__3_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12075,6 +12196,7 @@ define void @s_shuffle_v4p3_v4p3__3_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12107,6 +12229,7 @@ define void @s_shuffle_v4p3_v4p3__4_u_u_u() {
define void @s_shuffle_v4p3_v4p3__5_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__5_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12119,6 +12242,7 @@ define void @s_shuffle_v4p3_v4p3__5_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__5_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12131,6 +12255,7 @@ define void @s_shuffle_v4p3_v4p3__5_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12150,6 +12275,7 @@ define void @s_shuffle_v4p3_v4p3__5_u_u_u() {
define void @s_shuffle_v4p3_v4p3__6_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__6_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12162,6 +12288,7 @@ define void @s_shuffle_v4p3_v4p3__6_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__6_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12174,6 +12301,7 @@ define void @s_shuffle_v4p3_v4p3__6_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12193,6 +12321,7 @@ define void @s_shuffle_v4p3_v4p3__6_u_u_u() {
define void @s_shuffle_v4p3_v4p3__7_u_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_u_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12205,6 +12334,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_u_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12217,6 +12347,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12236,14 +12367,15 @@ define void @s_shuffle_v4p3_v4p3__7_u_u_u() {
define void @s_shuffle_v4p3_v4p3__7_0_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_0_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12252,14 +12384,15 @@ define void @s_shuffle_v4p3_v4p3__7_0_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_0_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12268,6 +12401,7 @@ define void @s_shuffle_v4p3_v4p3__7_0_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12343,14 +12477,15 @@ define void @s_shuffle_v4p3_v4p3__7_1_u_u() {
define void @s_shuffle_v4p3_v4p3__7_2_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_2_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12359,14 +12494,15 @@ define void @s_shuffle_v4p3_v4p3__7_2_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_2_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12375,6 +12511,7 @@ define void @s_shuffle_v4p3_v4p3__7_2_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12398,14 +12535,15 @@ define void @s_shuffle_v4p3_v4p3__7_2_u_u() {
define void @s_shuffle_v4p3_v4p3__7_3_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_3_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12414,14 +12552,15 @@ define void @s_shuffle_v4p3_v4p3__7_3_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_3_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12430,6 +12569,7 @@ define void @s_shuffle_v4p3_v4p3__7_3_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12453,6 +12593,7 @@ define void @s_shuffle_v4p3_v4p3__7_3_u_u() {
define void @s_shuffle_v4p3_v4p3__7_4_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_4_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12466,6 +12607,7 @@ define void @s_shuffle_v4p3_v4p3__7_4_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_4_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12479,6 +12621,7 @@ define void @s_shuffle_v4p3_v4p3__7_4_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12518,6 +12661,7 @@ define void @s_shuffle_v4p3_v4p3__7_5_u_u() {
define void @s_shuffle_v4p3_v4p3__7_6_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_6_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12531,6 +12675,7 @@ define void @s_shuffle_v4p3_v4p3__7_6_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_6_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12544,6 +12689,7 @@ define void @s_shuffle_v4p3_v4p3__7_6_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12564,6 +12710,7 @@ define void @s_shuffle_v4p3_v4p3__7_6_u_u() {
define void @s_shuffle_v4p3_v4p3__7_7_u_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12577,6 +12724,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12590,6 +12738,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12610,15 +12759,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_u() {
define void @s_shuffle_v4p3_v4p3__7_7_0_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_0_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12627,15 +12777,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_0_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12644,6 +12795,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12668,15 +12820,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_u() {
define void @s_shuffle_v4p3_v4p3__7_7_1_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_1_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12685,15 +12838,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_1_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12702,6 +12856,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12781,15 +12936,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_u() {
define void @s_shuffle_v4p3_v4p3__7_7_3_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_3_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -12798,15 +12954,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_3_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -12815,6 +12972,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12839,6 +12997,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_u() {
define void @s_shuffle_v4p3_v4p3__7_7_4_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_4_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12853,6 +13012,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_4_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12867,6 +13027,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12888,6 +13049,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_u() {
define void @s_shuffle_v4p3_v4p3__7_7_5_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_5_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12902,6 +13064,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_5_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12916,6 +13079,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -12957,6 +13121,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_u() {
define void @s_shuffle_v4p3_v4p3__7_7_7_u() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_7_u:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -12971,6 +13136,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_u() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_7_u:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -12985,6 +13151,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_u() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_u:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13424,6 +13591,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_7() {
define void @s_shuffle_v4p3_v4p3__u_0_0_0() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__u_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13438,6 +13606,7 @@ define void @s_shuffle_v4p3_v4p3__u_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__u_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13452,6 +13621,7 @@ define void @s_shuffle_v4p3_v4p3__u_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__u_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13645,6 +13815,7 @@ define void @s_shuffle_v4p3_v4p3__3_0_0_0() {
define void @s_shuffle_v4p3_v4p3__4_0_0_0() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__4_0_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -13659,6 +13830,7 @@ define void @s_shuffle_v4p3_v4p3__4_0_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__4_0_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -13673,6 +13845,7 @@ define void @s_shuffle_v4p3_v4p3__4_0_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__4_0_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -13876,14 +14049,15 @@ define void @s_shuffle_v4p3_v4p3__7_0_0_0() {
define void @s_shuffle_v4p3_v4p3__7_u_0_0() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_u_0_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
@@ -13893,14 +14067,15 @@ define void @s_shuffle_v4p3_v4p3__7_u_0_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_u_0_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
@@ -13910,6 +14085,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_0_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_0_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -14358,15 +14534,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_0() {
define void @s_shuffle_v4p3_v4p3__7_7_u_0() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_0:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -14375,15 +14552,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_0() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_0:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -14392,6 +14570,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_0() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_0:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -15070,14 +15249,15 @@ define void @s_shuffle_v4p3_v4p3__7_1_1_1() {
define void @s_shuffle_v4p3_v4p3__7_u_1_1() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_u_1_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
; GFX900-NEXT: s_mov_b32 s10, s5
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -15087,14 +15267,15 @@ define void @s_shuffle_v4p3_v4p3__7_u_1_1() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_u_1_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
; GFX90A-NEXT: s_mov_b32 s10, s5
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -15104,6 +15285,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_1_1() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_1_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -15552,15 +15734,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_1() {
define void @s_shuffle_v4p3_v4p3__7_7_u_1() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_1:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -15569,15 +15752,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_1() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_1:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -15586,6 +15770,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_1() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_1:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -16725,15 +16910,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_2() {
define void @s_shuffle_v4p3_v4p3__7_7_u_2() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_2:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s11
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s15
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s11, s6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
@@ -16742,15 +16928,16 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_2() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_2:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s11
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s15
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s11, s6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
@@ -16759,6 +16946,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_2() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_2:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18358,6 +18546,7 @@ define void @s_shuffle_v4p3_v4p3__0_4_4_4() {
define void @s_shuffle_v4p3_v4p3__1_4_4_4() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__1_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18370,6 +18559,7 @@ define void @s_shuffle_v4p3_v4p3__1_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__1_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18382,6 +18572,7 @@ define void @s_shuffle_v4p3_v4p3__1_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18400,6 +18591,7 @@ define void @s_shuffle_v4p3_v4p3__1_4_4_4() {
define void @s_shuffle_v4p3_v4p3__2_4_4_4() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__2_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18412,6 +18604,7 @@ define void @s_shuffle_v4p3_v4p3__2_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__2_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18424,6 +18617,7 @@ define void @s_shuffle_v4p3_v4p3__2_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18442,6 +18636,7 @@ define void @s_shuffle_v4p3_v4p3__2_4_4_4() {
define void @s_shuffle_v4p3_v4p3__3_4_4_4() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__3_4_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18454,6 +18649,7 @@ define void @s_shuffle_v4p3_v4p3__3_4_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__3_4_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18466,6 +18662,7 @@ define void @s_shuffle_v4p3_v4p3__3_4_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_4_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -18654,6 +18851,7 @@ define void @s_shuffle_v4p3_v4p3__7_4_4_4() {
define void @s_shuffle_v4p3_v4p3__7_u_4_4() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_u_4_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -18668,6 +18866,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_4_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_u_4_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -18682,6 +18881,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_4_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_4_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -19100,6 +19300,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_4() {
define void @s_shuffle_v4p3_v4p3__7_7_u_4() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_4:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -19114,6 +19315,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_4() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_4:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -19128,6 +19330,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_4() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_4:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -19829,6 +20032,7 @@ define void @s_shuffle_v4p3_v4p3__7_5_5_5() {
define void @s_shuffle_v4p3_v4p3__7_u_5_5() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_u_5_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -19843,6 +20047,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_5_5() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_u_5_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -19857,6 +20062,7 @@ define void @s_shuffle_v4p3_v4p3__7_u_5_5() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_5_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -20275,6 +20481,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_5() {
define void @s_shuffle_v4p3_v4p3__7_7_u_5() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_5:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -20289,6 +20496,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_5() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_5:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -20303,6 +20511,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_5() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_5:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
@@ -21349,6 +21558,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_6() {
define void @s_shuffle_v4p3_v4p3__7_7_u_6() {
; GFX900-LABEL: s_shuffle_v4p3_v4p3__7_7_u_6:
; GFX900: ; %bb.0:
+; GFX900-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
@@ -21363,6 +21573,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_6() {
;
; GFX90A-LABEL: s_shuffle_v4p3_v4p3__7_7_u_6:
; GFX90A: ; %bb.0:
+; GFX90A-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
@@ -21377,6 +21588,7 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_6() {
;
; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_6:
; GFX942: ; %bb.0:
+; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 4621be5cab450..458da8244d3f2 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -21,11 +21,12 @@ define amdgpu_kernel void @foobar(float %a0, float %a1, ptr addrspace(1) %out) #
; CHECK-NEXT: v_mov_b32_e32 v3, s7
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
; CHECK-NEXT: ; %bb.1: ; %ift
-; CHECK-NEXT: s_mov_b32 s4, s5
-; CHECK-NEXT: v_mov_b32_e32 v0, s4
-; CHECK-NEXT: v_mov_b32_e32 v1, s5
-; CHECK-NEXT: v_mov_b32_e32 v2, s6
-; CHECK-NEXT: v_mov_b32_e32 v3, s7
+; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
+; CHECK-NEXT: s_mov_b32 s8, s5
+; CHECK-NEXT: v_mov_b32_e32 v0, s8
+; CHECK-NEXT: v_mov_b32_e32 v1, s9
+; CHECK-NEXT: v_mov_b32_e32 v2, s10
+; CHECK-NEXT: v_mov_b32_e32 v3, s11
; CHECK-NEXT: ; %bb.2: ; %ife
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-select.ll b/llvm/test/CodeGen/AMDGPU/uniform-select.ll
index f001bf0d5e498..6ab1e688a3684 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-select.ll
@@ -8,6 +8,7 @@ define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) {
; GFX90A-LABEL: test_insert_extract:
; GFX90A: ; %bb.0: ; %entry
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GFX90A-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX90A-NEXT: s_mov_b32 s2, 0
; GFX90A-NEXT: s_and_b64 vcc, exec, -1
; GFX90A-NEXT: s_mov_b32 s3, 0
@@ -56,6 +57,7 @@ define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) {
; GFX942-LABEL: test_insert_extract:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX942-NEXT: s_mov_b32 s2, 0
; GFX942-NEXT: s_and_b64 vcc, exec, -1
; GFX942-NEXT: s_mov_b32 s3, 0
@@ -110,6 +112,7 @@ define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) {
; GFX1030-NEXT: s_mov_b32 s5, 0
; GFX1030-NEXT: s_mov_b32 s6, 0
; GFX1030-NEXT: s_mov_b32 vcc_lo, exec_lo
+; GFX1030-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX1030-NEXT: .p2align 6
; GFX1030-NEXT: .LBB0_1: ; %for.body
; GFX1030-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -158,6 +161,7 @@ define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) {
; GFX1100-NEXT: s_mov_b32 s5, 0
; GFX1100-NEXT: s_mov_b32 s6, 0
; GFX1100-NEXT: s_mov_b32 vcc_lo, exec_lo
+; GFX1100-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX1100-NEXT: .p2align 6
; GFX1100-NEXT: .LBB0_1: ; %for.body
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-mul.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-mul.ll
index 98919f565d902..4b4e2023dccc6 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-mul.ll
@@ -2475,7 +2475,9 @@ define i64 @test_vector_reduce_mul_v2i64(<2 x i64> %v) {
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, v0
; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, v1
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[6:7]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[3:4]
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2496,7 +2498,9 @@ define i64 @test_vector_reduce_mul_v2i64(<2 x i64> %v) {
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, v0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, v1
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v3, v[6:7]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[3:4]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2534,8 +2538,10 @@ define i64 @test_vector_reduce_mul_v2i64(<2 x i64> %v) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, v0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v4, v3, v[1:2]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v4, v3, v[6:7]
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v5, v2, v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2555,11 +2561,13 @@ define i64 @test_vector_reduce_mul_v2i64(<2 x i64> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v2
; GFX11-GISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v4, v5, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v4, v3, v[1:2]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v6, v5, v[7:8]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v4, v3, v[7:8]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v6, v5, v[9:10]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_mul_v2i64:
@@ -2614,12 +2622,15 @@ define i64 @test_vector_reduce_mul_v3i64(<3 x i64> %v) {
; GFX7-GISEL: ; %bb.0: ; %entry
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v2, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX7-GISEL-NEXT: v_mov_b32_e32 v6, v1
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v8
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v3, v[1:2]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v9, v8
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v3, v[9:10]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v4, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v2, v[8:9]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v5, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v8, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v5, v[8:9]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v4, v[5:6]
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2642,12 +2653,15 @@ define i64 @test_vector_reduce_mul_v3i64(<3 x i64> %v) {
; GFX8-GISEL: ; %bb.0: ; %entry
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v2, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, v1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v8
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v3, v[1:2]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v9, v8
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v3, v[9:10]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v4, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v2, v[8:9]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v5, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v5, v[8:9]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v4, v[5:6]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2694,13 +2708,17 @@ define i64 @test_vector_reduce_mul_v3i64(<3 x i64> %v) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, v0
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
; GFX10-GISEL-NEXT: v_mov_b32_e32 v7, v1
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], s4, v6, v2, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v6, v3, v[9:10]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v10, v9
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v4, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v7, v2, v[9:10]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], s4, v8, v5, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v2, v4, v[5:6]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v6, v3, v[10:11]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v10, v1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v7, v2, v[9:10]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v8, v5, v[10:11]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v1, v4, v[2:3]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_vector_reduce_mul_v3i64:
@@ -2722,15 +2740,20 @@ define i64 @test_vector_reduce_mul_v3i64(<3 x i64> %v) {
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_mov_b32 v7, v1
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v6, v2, 0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v10, v9
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v6, v3, v[10:11]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v9, v1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v7, v2, v[12:13]
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v6, v3, v[9:10]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v7, v2, v[10:11]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v5, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v11, v4, v[6:7]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v8, v5, v[9:10]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v13, v4, v[6:7]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_mul_v3i64:
@@ -2801,17 +2824,21 @@ define i64 @test_vector_reduce_mul_v4i64(<4 x i64> %v) {
; GFX7-GISEL: ; %bb.0: ; %entry
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v4, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v2, v6, 0
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v2, v6, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
; GFX7-GISEL-NEXT: v_mov_b32_e32 v8, v1
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v10
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v0, v5, v[1:2]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v12
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[15:16], s[4:5], v2, v7, v[0:1]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v11, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v6, v[15:16]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v4, v[13:14]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v2, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v11, v[1:2]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v5, v[11:12]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v14
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v2, v7, v[0:1]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v13, 0
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v6, v[14:15]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v4, v[10:11]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v2, v[4:5]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v13, v[1:2]
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: test_vector_reduce_mul_v4i64:
@@ -2838,17 +2865,21 @@ define i64 @test_vector_reduce_mul_v4i64(<4 x i64> %v) {
; GFX8-GISEL: ; %bb.0: ; %entry
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v4, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v2, v6, 0
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v2, v6, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr11_vgpr12
; GFX8-GISEL-NEXT: v_mov_b32_e32 v8, v1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v10
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v0, v5, v[1:2]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v12
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[15:16], s[4:5], v2, v7, v[0:1]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v11, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v6, v[15:16]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v4, v[13:14]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v2, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v11, v[1:2]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v11, v10
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v5, v[11:12]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v14
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v2, v7, v[0:1]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v13, 0
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v6, v[14:15]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v4, v[10:11]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v2, v[4:5]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v13, v[1:2]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: test_vector_reduce_mul_v4i64:
@@ -2908,15 +2939,19 @@ define i64 @test_vector_reduce_mul_v4i64(<4 x i64> %v) {
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], s4, v2, v6, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v8, v0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v9, v1
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[12:13], s4, v8, v4, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v11
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], s4, v2, v7, v[0:1]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v13
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[16:17], s4, v2, v7, v[0:1]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v14, v13
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v12, v10, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], s4, v8, v5, v[2:3]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v3, v6, v[14:15]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], s4, v8, v5, v[14:15]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v3, v6, v[16:17]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, v1
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v9, v4, v[7:8]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v12, v2, v[1:2]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v12, v2, v[5:6]
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v3, v10, v[1:2]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2945,21 +2980,26 @@ define i64 @test_vector_reduce_mul_v4i64(<4 x i64> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v2, v6, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v8, v4, 0
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v11
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v2, v7, v[0:1]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, v13
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v14, v13
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[16:17], null, v2, v7, v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v12, v10, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[15:16], null, v8, v5, v[2:3]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[17:18], null, v8, v5, v[14:15]
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v3, v6, v[14:15]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v9, v4, v[15:16]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v12, v7, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v5, v10, v[3:4]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v3, v6, v[16:17]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v9, v4, v[17:18]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, v1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v12, v7, v[2:3]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v5, v10, v[8:9]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_mul_v4i64:
@@ -3060,30 +3100,39 @@ define i64 @test_vector_reduce_mul_v8i64(<8 x i64> %v) {
; GFX7-GISEL: ; %bb.0: ; %entry
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v4, v12, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr18_vgpr19
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v18, v17
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v4, v13, v[18:19]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v0, v8, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v4, v13, v[17:18]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v12, v[17:18]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v19, v16, 0
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, v13
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v13, v20
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v0, v9, v[13:14]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v4, v[5:6]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v17, v13
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v4, v[17:18]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v17, v20
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v0, v9, v[17:18]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v6, v14, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v8, v[17:18]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v2, v10, 0
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v20
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v15, v[0:1]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v6, v18
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v2, v11, v[6:7]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr20_vgpr21
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v20, v18
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v2, v11, v[20:21]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v14, v[0:1]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v17, v19, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v10, v[20:21]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v7
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v17, v0, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v9, v7
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v17, v0, v[9:10]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v6, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v19, v[9:10]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v16, v[4:5]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v2, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v2, v[4:5]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v6, v[1:2]
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3131,30 +3180,39 @@ define i64 @test_vector_reduce_mul_v8i64(<8 x i64> %v) {
; GFX8-GISEL: ; %bb.0: ; %entry
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v4, v12, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr18_vgpr19
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v18, v17
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v4, v13, v[18:19]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v0, v8, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v4, v13, v[17:18]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v12, v[17:18]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v19, v16, 0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, v13
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v13, v20
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v0, v9, v[13:14]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v4, v[5:6]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v17, v13
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v4, v[17:18]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v17, v20
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v0, v9, v[17:18]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v6, v14, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v8, v[17:18]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v2, v10, 0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v20
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v15, v[0:1]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, v18
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v2, v11, v[6:7]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr20_vgpr21
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v20, v18
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v2, v11, v[20:21]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v14, v[0:1]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v17, v19, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v10, v[20:21]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v7
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v17, v0, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v9, v7
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v17, v0, v[9:10]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v6, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v19, v[9:10]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v16, v[4:5]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v2, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v2, v[4:5]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v6, v[1:2]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3262,32 +3320,40 @@ define i64 @test_vector_reduce_mul_v8i64(<8 x i64> %v) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[17:18], s4, v0, v8, 0
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[19:20], s4, v2, v10, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[23:24], s4, v6, v14, 0
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr21_vgpr22
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[25:26], s4, v4, v12, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v21, v18
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr27_vgpr28
; GFX10-GISEL-NEXT: v_mov_b32_e32 v16, v1
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[21:22], s4, v6, v14, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[23:24], s4, v4, v12, 0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v18
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v18, v20
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[25:26], s4, v0, v9, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v2, v11, v[18:19]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v22
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v24
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[29:30], s4, v4, v13, v[2:3]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[26:27], s4, v6, v15, v[0:1]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[27:28], s4, v19, v21, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[30:31], s4, v17, v23, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v3, v10, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], s4, v7, v14, v[26:27]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v28
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v5, v12, v[29:30]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, v31
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], s4, v19, v6, v[0:1]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], s4, v16, v8, v[25:26]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v30, v27, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v17, v3, v[4:5]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], s4, v2, v21, v[5:6]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v6, v23, v[3:4]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v30, v4, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v2, v27, v[3:4]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v27, v20
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[29:30], s4, v19, v23, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[20:21], s4, v0, v9, v[21:22]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[21:22], s4, v2, v11, v[27:28]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v24
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr27_vgpr28
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v27, v26
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v6, v15, v[0:1]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v4, v13, v[27:28]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v3, v10, v[21:22]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[26:27], s4, v17, v25, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, v30
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], s4, v7, v14, v[0:1]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v12, v[1:2]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v26, v29, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v10, v27
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v19, v6, v[3:4]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], s4, v16, v8, v[20:21]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], s4, v17, v9, v[10:11]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, v1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v2, v23, v[3:4]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v4, v25, v[5:6]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v26, v2, v[6:7]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v3, v29, v[1:2]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_vector_reduce_mul_v8i64:
@@ -3333,37 +3399,47 @@ define i64 @test_vector_reduce_mul_v8i64(<8 x i64> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[17:18], null, v0, v8, 0
; GFX11-GISEL-NEXT: v_mad_u64_u32 v[19:20], null, v2, v10, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[21:22], null, v6, v14, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[23:24], null, v4, v12, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v16, v1 :: v_dual_mov_b32 v1, v18
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v18, v20
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[25:26], null, v0, v9, v[1:2]
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v22 :: v_dual_mov_b32 v1, v24
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[26:27], null, v2, v11, v[18:19]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[27:28], null, v6, v15, v[0:1]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[30:31], null, v4, v13, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[28:29], null, v19, v21, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[31:32], null, v17, v23, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[23:24], null, v6, v14, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr21_vgpr22
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v16, v1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[25:26], null, v4, v12, 0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v21, v18
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr27_vgpr28
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v27, v20
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[29:30], null, v0, v9, v[21:22]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[20:21], null, v2, v11, v[27:28]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v24
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr21_vgpr22
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v21, v26
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v7, v14, v[27:28]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v3, v10, v[26:27]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v5, v12, v[30:31]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, v29
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, v32
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v16, v8, v[25:26]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v19, v0, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v31, v28, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v17, v2, v[3:4]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[26:27], null, v6, v15, v[0:1]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[27:28], null, v19, v23, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v4, v13, v[21:22]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[21:22], null, v17, v25, 0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v6, v21, v[4:5]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v9, v23, v[7:8]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v7, v14, v[26:27]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v3, v10, v[20:21]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, v28
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v5, v12, v[0:1]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, v22
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v19, v1, v[2:3]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v21, v27, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v16, v8, v[29:30]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v17, v9, v[4:5]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v6, v23, v[10:11]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, v1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v2, v25, v[7:8]
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v31, v2, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v3, v28, v[4:5]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v21, v3, v[4:5]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v8, v27, v[6:7]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_mul_v8i64:
@@ -3542,63 +3618,82 @@ define i64 @test_vector_reduce_mul_v16i64(<16 x i64> %v) {
; GFX7-GISEL: ; %bb.0: ; %entry
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[31:32], s[4:5], v8, v24, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[32:33], s[4:5], v8, v25, v[32:33]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v33, v32
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[32:33], s[4:5], v8, v25, v[33:34]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr34_vgpr35
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v9, v24, v[32:33]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v0, v16, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[32:33], s[4:5], v24, v31, 0
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v9, v33
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v24, v8, v[9:10]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v24, v25
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v0, v17, v[24:25]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v34, v33
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v24, v8, v[34:35]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v33, v25
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v0, v17, v[33:34]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v16, v[24:25]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v18, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr24_vgpr25
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v31, v[8:9]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v26, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v27, v[1:2]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v16, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v27, v[16:17]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v18, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v26, v[9:10]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v16, v0, 0
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v16, v9, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr26_vgpr27
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v26, v17
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v24, v11
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v16, v9, v[24:25]
; GFX7-GISEL-NEXT: buffer_load_dword v9, off, s[0:3], s32
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v17
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v19, v[1:2]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v19, v[26:27]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v18, v[1:2]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v28, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v1, v0, v[24:25]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v4, v20, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr24_vgpr25
; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v29, v[0:1]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v17, v2, 0
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, v18
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v28, v[0:1]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v21, v[3:4]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v12
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v24, v12
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr12_vgpr13
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v12, v18
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v21, v[12:13]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v14, v30, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v20, v[3:4]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v0, v[1:2]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v0, v[24:25]
; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, v13
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v6, v22, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v2, v[0:1]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, v18
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v9, v[4:5]
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v12, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v15, v30, v[4:5]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v23, v[3:4]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, v18
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v23, v[4:5]
; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, v14
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v17, v1, v[2:3]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v22, v[3:4]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v32, v11, 0
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v22, v[4:5]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v13, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v12, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v13, 0
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v32, v0, v[5:6]
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v1, v[0:1]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v32, v11, 0
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v14, v7
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v1, v[14:15]
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, v5
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v32, v0, v[2:3]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v6, 0
; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v16, v13, v[9:10]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v11, v[5:6]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v9, v[1:2]
-; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[3:4]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v11, v[2:3]
+; GFX7-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v9, v[7:8]
+; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v6, v[3:4]
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: test_vector_reduce_mul_v16i64:
@@ -3687,63 +3782,82 @@ define i64 @test_vector_reduce_mul_v16i64(<16 x i64> %v) {
; GFX8-GISEL: ; %bb.0: ; %entry
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[31:32], s[4:5], v8, v24, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[32:33], s[4:5], v8, v25, v[32:33]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v33, v32
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[32:33], s[4:5], v8, v25, v[33:34]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr34_vgpr35
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v9, v24, v[32:33]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v0, v16, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[32:33], s[4:5], v24, v31, 0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v9, v33
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v24, v8, v[9:10]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v24, v25
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v0, v17, v[24:25]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v34, v33
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v24, v8, v[34:35]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v33, v25
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v0, v17, v[33:34]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v16, v[24:25]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v18, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr24_vgpr25
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v31, v[8:9]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v26, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v27, v[1:2]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v16, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v27, v[16:17]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v18, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v26, v[9:10]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v16, v0, 0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v16, v9, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr26_vgpr27
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v26, v17
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v24, v11
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v16, v9, v[24:25]
; GFX8-GISEL-NEXT: buffer_load_dword v9, off, s[0:3], s32
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v17
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v19, v[1:2]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v19, v[26:27]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v18, v[1:2]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v28, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v1, v0, v[24:25]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v4, v20, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr24_vgpr25
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v3
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v29, v[0:1]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v17, v2, 0
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, v18
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v28, v[0:1]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v21, v[3:4]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v12
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v24, v12
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr12_vgpr13
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v12, v18
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v21, v[12:13]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v14, v30, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v20, v[3:4]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v0, v[1:2]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v17, v0, v[24:25]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, v13
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[17:18], s[4:5], v6, v22, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v2, v[0:1]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, v18
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v9, v[4:5]
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v17, v12, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v15, v30, v[4:5]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v23, v[3:4]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, v18
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v23, v[4:5]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, v14
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v17, v1, v[2:3]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v22, v[3:4]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v32, v11, 0
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v22, v[4:5]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v13, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v12, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v13, 0
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v32, v0, v[5:6]
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v1, v[0:1]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v32, v11, 0
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, v7
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v10, v1, v[14:15]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, v5
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v32, v0, v[2:3]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v6, 0
; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v16, v13, v[9:10]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v11, v[5:6]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v9, v[1:2]
-; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[3:4]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v11, v[2:3]
+; GFX8-GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v7, v1
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v9, v[7:8]
+; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, v6, v[3:4]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: test_vector_reduce_mul_v16i64:
@@ -3951,65 +4065,82 @@ define i64 @test_vector_reduce_mul_v16i64(<16 x i64> %v) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[31:32], s4, v0, v16, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[33:34], s4, v2, v18, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[38:39], s4, v6, v22, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[35:36], s4, v0, v17, v[32:33]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[36:37], s4, v4, v20, 0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v34
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[16:17], s4, v1, v16, v[35:36]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v2, v19, v[0:1]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v37
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v39
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[34:35], s4, v8, v24, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[48:49], s4, v4, v21, v[1:2]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[35:36], s4, v2, v18, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[37:38], s4, v4, v20, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[48:49], s4, v6, v22, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v33, v32
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr51_vgpr52
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr53_vgpr54
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[32:33], s4, v0, v17, v[33:34]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v33, v36
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[16:17], s4, v1, v16, v[32:33]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v2, v19, v[33:34]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr32_vgpr33
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v38
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v32, v49
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[49:50], s4, v10, v26, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v6, v23, v[2:3]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v35
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], s4, v5, v20, v[48:49]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[51:52], s4, v8, v25, v[2:3]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v50
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[52:53], s4, v10, v27, v[2:3]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[53:54], s4, v12, v28, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v9, v24, v[51:52]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v54
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], s4, v11, v26, v[52:53]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[19:20], s4, v36, v53, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[54:55], s4, v12, v29, v[2:3]
-; GFX10-GISEL-NEXT: buffer_load_dword v12, off, s[0:3], s32
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[38:39], s4, v8, v24, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v4, v21, v[1:2]
+; GFX10-GISEL-NEXT: buffer_load_dword v21, off, s[0:3], s32
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[32:33], s4, v6, v23, v[32:33]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v51, v50
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v33, v39
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v3, v18, v[0:1]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[50:51], s4, v10, v27, v[51:52]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[51:52], s4, v12, v28, 0
; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v14, v30, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[17:18], s4, v33, v49, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], s4, v7, v22, v[1:2]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], s4, v31, v34, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v5, v20, v[1:2]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[33:34], s4, v8, v25, v[33:34]
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v53, v52
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[19:20], s4, v35, v49, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, v4
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr17_vgpr18
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[52:53], s4, v12, v29, v[53:54]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], s4, v7, v22, v[32:33]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], s4, v9, v24, v[33:34]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], s4, v11, v26, v[50:51]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v31, v38, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[12:13], s4, v13, v28, v[52:53]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr24_vgpr25
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v17, v10
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[17:18], s4, v31, v8, v[17:18]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v14, v12, v[0:1]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], s4, v38, v3, 0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v8
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[13:14], s4, v13, v28, v[54:55]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], s4, v15, v30, v[0:1]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v18
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], s4, v31, v9, v[1:2]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v12
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v33, v10, v[0:1]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v20
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], s4, v38, v14, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[20:21], s4, v17, v11, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v36, v13, v[0:1]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[12:13], s4, v7, v19, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v6, v3, v[14:15]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], s4, v14, v21, v[5:6]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], s4, v48, v3, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[13:14], s4, v37, v51, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[21:22], s4, v15, v30, v[4:5]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr22_vgpr23
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v24, v6
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[10:11], s4, v35, v11, v[22:23]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr22_vgpr23
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v22, v14
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[14:15], s4, v48, v21, v[24:25]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[20:21], s4, v19, v5, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[11:12], s4, v37, v12, v[22:23]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[22:23], s4, v9, v13, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v7, v3, v[14:15]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], s4, v2, v49, v[10:11]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], s4, v0, v51, v[11:12]
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v21
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[9:10], s4, v2, v49, v[9:10]
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, v13
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], s4, v5, v53, v[0:1]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[5:6], s4, v17, v3, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v12, v20, 0
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v7, v4, v[2:3]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[13:14], s4, v16, v34, v[8:9]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v9, v11, v[5:6]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], s4, v13, v19, v[2:3]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v12, v3, v[1:2]
-; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v4, v20, v[1:2]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v10, v23
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v19, v3, v[1:2]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], s4, v16, v38, v[17:18]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], s4, v22, v20, 0
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[7:8], s4, v9, v7, v[10:11]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], s4, v6, v5, v[2:3]
+; GFX10-GISEL-NEXT: ; implicit-def: $vgpr5_vgpr6
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v3, v13, v[7:8]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], s4, v22, v4, v[5:6]
+; GFX10-GISEL-NEXT: v_mad_u64_u32 v[1:2], s4, v1, v20, v[2:3]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_vector_reduce_mul_v16i64:
@@ -4090,66 +4221,90 @@ define i64 @test_vector_reduce_mul_v16i64(<16 x i64> %v) {
; GFX11-GISEL-LABEL: test_vector_reduce_mul_v16i64:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: scratch_load_b32 v71, off, s32
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[31:32], null, v0, v16, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[33:34], null, v2, v18, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[35:36], null, v4, v20, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[37:38], null, v6, v22, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[50:51], null, v10, v26, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[52:53], null, v12, v28, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[48:49], null, v8, v24, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[54:55], null, v14, v30, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[82:83], null, v0, v17, v[32:33]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[83:84], null, v2, v19, v[34:35]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[84:85], null, v4, v21, v[36:37]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[85:86], null, v6, v23, v[38:39]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[86:87], null, v10, v27, v[51:52]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[65:66], null, v31, v48, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[38:39], null, v8, v25, v[49:50]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v64, v55
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[96:97], null, v12, v29, v[53:54]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[97:98], null, v1, v16, v[82:83]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v3, v18, v[83:84]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v5, v20, v[84:85]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v7, v22, v[85:86]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[67:68], null, v33, v50, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[80:81], null, v37, v54, 0
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v66
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[69:70], null, v35, v52, 0
+; GFX11-GISEL-NEXT: scratch_load_b32 v39, off, s32
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[68:69], null, v0, v16, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[70:71], null, v2, v18, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[80:81], null, v4, v20, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[82:83], null, v6, v22, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[84:85], null, v8, v24, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[86:87], null, v10, v26, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[98:99], null, v14, v30, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr31_vgpr32
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr33_vgpr34
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[96:97], null, v12, v28, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr35_vgpr36
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v31, v69
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v33, v71
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr37_vgpr38
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v35, v81
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr48_vgpr49
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v37, v83 :: v_dual_mov_b32 v48, v85
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr50_vgpr51
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr54_vgpr55
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr52_vgpr53
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[101:102], null, v70, v86, 0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v50, v87
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[116:117], null, v0, v17, v[31:32]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[31:32], null, v2, v19, v[33:34]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v54, v99
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[32:33], null, v4, v21, v[35:36]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v52, v97
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[33:34], null, v6, v23, v[37:38]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[34:35], null, v8, v25, v[48:49]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[35:36], null, v10, v27, v[50:51]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[36:37], null, v12, v29, v[52:53]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[99:100], null, v68, v84, 0
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[37:38], null, v1, v16, v[116:117]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v3, v18, v[31:32]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v5, v20, v[32:33]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v7, v22, v[33:34]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr64_vgpr65
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[114:115], null, v82, v98, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr66_vgpr67
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[112:113], null, v80, v96, 0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v64, v100
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v66, v102
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v14, v71, v[64:65]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v9, v24, v[38:39]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v11, v26, v[86:87]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v13, v28, v[96:97]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v15, v30, v[4:5]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, v68
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v31, v5, v[0:1]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v81
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v33, v6, v[4:5]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, v70
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v37, v8, v[0:1]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v67, v80, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v35, v7, v[4:5]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v65, v69, 0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, v12
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[14:15], null, v3, v54, v[5:6]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v1, v50, v[10:11]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v2, v52, v[13:14]
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, v7
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v67, v14, v[0:1]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v97, v48, v[9:10]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v6, v11, 0
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v65, v4, v[2:3]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v3, v80, v[7:8]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v12, v69, v[8:9]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v6, v4, v[1:2]
-; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v9, v11, v[7:8]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v14, v39, v[54:55]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v9, v24, v[34:35]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v11, v26, v[35:36]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v13, v28, v[36:37]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v15, v30, v[3:4]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v68, v4, v[64:65]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr3_vgpr4
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v70, v5, v[66:67]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, v115
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v10, v113
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v82, v7, v[3:4]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v80, v6, v[10:11]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v101, v114, 0
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[5:6], null, v99, v112, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[10:11], null, v2, v98, v[12:13]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v14, v4
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[16:17], null, v1, v96, v[13:14]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[11:12], null, v0, v86, v[9:10]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr12_vgpr13
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v5, v3, 0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v12, v6
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v101, v10, v[14:15]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[9:10], null, v37, v84, v[8:9]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[7:8], null, v99, v16, v[12:13]
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[12:13], null, v11, v114, v[6:7]
+; GFX11-GISEL-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[13:14], null, v9, v112, v[7:8]
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v10, v1
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v5, v12, v[10:11]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u64_u32 v[1:2], null, v13, v3, v[6:7]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-SDAG-LABEL: test_vector_reduce_mul_v16i64:
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
index b5d9d00c48045..53785250b0f98 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
@@ -64,9 +64,14 @@ define i8 @test_vector_reduce_smax_v2i8(<2 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v2i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v3, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -91,13 +96,18 @@ define i8 @test_vector_reduce_smax_v2i8(<2 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v2i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v3, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -207,14 +217,20 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v3i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -245,18 +261,24 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v3i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -446,19 +468,30 @@ define i8 @test_vector_reduce_smax_v4i8(<4 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v4i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v5, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 8, v1
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -516,23 +549,34 @@ define i8 @test_vector_reduce_smax_v4i8(<4 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v4i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v5, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 8, v1
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -795,36 +839,52 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v5, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr7
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v10, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v2.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v2
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
@@ -901,40 +961,56 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v5, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr7
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
+; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v10, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v2.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v2
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
@@ -1306,55 +1382,86 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr16
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr18
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr20
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr19
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v16.l, v12.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr12
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v10.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v12.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v20.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v13.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v16, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v19.l, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v9, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v11.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v11, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v20, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v17.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v16.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v18, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v9.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v10, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v19, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v12.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -1462,59 +1569,90 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr16
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr18
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr20
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr19
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v16.l, v12.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr12
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v10.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v12.l, v4.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v20.l, v6.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v13.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v16, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v19.l, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v9, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v11.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v11, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v20, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v17.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v16.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v18, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v9.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v10, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v19, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v12.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
-; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
-; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
index 2a989ecd2ebad..4eccee8e816e6 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
@@ -64,9 +64,14 @@ define i8 @test_vector_reduce_smin_v2i8(<2 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v2i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v3, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -91,13 +96,18 @@ define i8 @test_vector_reduce_smin_v2i8(<2 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v2i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v3, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -207,14 +217,20 @@ define i8 @test_vector_reduce_smin_v3i8(<3 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v3i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -245,18 +261,24 @@ define i8 @test_vector_reduce_smin_v3i8(<3 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v3i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -446,19 +468,30 @@ define i8 @test_vector_reduce_smin_v4i8(<4 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v4i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v5, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 8, v1
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -516,23 +549,34 @@ define i8 @test_vector_reduce_smin_v4i8(<4 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v4i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v5, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr3
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 8, v1
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -795,36 +839,52 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v5, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr7
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v10, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v2.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v2
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
@@ -901,40 +961,56 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v5, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr7
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
+; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v10, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v9, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v2.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v2
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
@@ -1306,55 +1382,86 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
;
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr16
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr18
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr20
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr19
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v16.l, v12.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr12
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v10.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v12.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v20.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v13.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v16, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v19.l, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v4, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v9, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v11.l
+; GFX11-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v11, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v20, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v17.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v16.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v8.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v18, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v9.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v10, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
-; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v19, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v12.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
-; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -1462,59 +1569,90 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr16
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr18
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr20
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr19
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v16.l, v12.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr12
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v10.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v12.l, v4.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr10
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v20.l, v6.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v13.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v16, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v10.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr2
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr4
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v19.l, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr9
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v4, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v9, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr5
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v1, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v11.l
+; GFX12-SDAG-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v11, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v20, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v17.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v16.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v8.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v18, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v9.l
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v10, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
-; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v19, 0, 8
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v12.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
-; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
-; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
-; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
-; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
+; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
diff --git a/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll b/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll
index 07e9325095017..3018cd3d20d7f 100644
--- a/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll
@@ -454,6 +454,7 @@ define <2 x i16> @shuffle_v2i16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
@@ -498,6 +499,7 @@ define <4 x i16> @shuffle_v4i16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
@@ -549,6 +551,7 @@ define <8 x i16> @shuffle_v8i16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
@@ -612,6 +615,7 @@ define <16 x i16> @shuffle_v16i16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
@@ -699,6 +703,7 @@ define <32 x i16> @shuffle_v32i16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
@@ -1119,6 +1124,7 @@ define <3 x bfloat> @shuffle_v3bf16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
@@ -1518,6 +1524,7 @@ define <3 x half> @shuffle_v3f16_rebroadcast(ptr addrspace(1) %arg0) {
; GFX11-TRUE16: ; %bb.0: ; %entry
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
index b01e92d6979a3..5874b667ce5e2 100644
--- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
@@ -209,10 +209,11 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-TRUE16-LABEL: shuffle_v4f16_3u6u:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off offset:4
+; GFX11-TRUE16-NEXT: global_load_b32 v4, v[0:1], off offset:4
; GFX11-TRUE16-NEXT: global_load_b32 v1, v[2:3], off offset:4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -268,10 +269,11 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-TRUE16-LABEL: shuffle_v4f16_3uu7:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off offset:4
+; GFX11-TRUE16-NEXT: global_load_b32 v4, v[0:1], off offset:4
; GFX11-TRUE16-NEXT: global_load_b32 v1, v[2:3], off offset:4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -389,9 +391,12 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v4, v[0:1], off offset:4
; GFX11-TRUE16-NEXT: global_load_b64 v[0:1], v[2:3], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: shuffle_v4f16_357u:
@@ -2628,6 +2633,7 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v2, v[2:3], off
; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
@@ -3505,10 +3511,11 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-TRUE16-LABEL: shuffle_v4bf16_3u6u:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off offset:4
+; GFX11-TRUE16-NEXT: global_load_b32 v4, v[0:1], off offset:4
; GFX11-TRUE16-NEXT: global_load_b32 v1, v[2:3], off offset:4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -3564,10 +3571,11 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-TRUE16-LABEL: shuffle_v4bf16_3uu7:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: global_load_b32 v0, v[0:1], off offset:4
+; GFX11-TRUE16-NEXT: global_load_b32 v4, v[0:1], off offset:4
; GFX11-TRUE16-NEXT: global_load_b32 v1, v[2:3], off offset:4
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -3685,9 +3693,12 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_b32 v4, v[0:1], off offset:4
; GFX11-TRUE16-NEXT: global_load_b64 v[0:1], v[2:3], off
+; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: shuffle_v4bf16_357u:
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index 53bede84513c9..c9830010b8056 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -237,11 +237,20 @@ bb:
}
define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negA(<4 x half> %A, <8 x half> %B, <4 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negA:
-; GFX12: ; %bb.0: ; %bb
-; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT: global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT: s_endpgm
+; GFX12-TRUE16-LABEL: test_swmmac_f32_16x16x32_f16_negA:
+; GFX12-TRUE16: ; %bb.0: ; %bb
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr13
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v13.l, v10.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v13 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GFX12-TRUE16-NEXT: global_store_b128 v[11:12], v[6:9], off
+; GFX12-TRUE16-NEXT: s_endpgm
+;
+; GFX12-FAKE16-LABEL: test_swmmac_f32_16x16x32_f16_negA:
+; GFX12-FAKE16: ; %bb.0: ; %bb
+; GFX12-FAKE16-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GFX12-FAKE16-NEXT: global_store_b128 v[11:12], v[6:9], off
+; GFX12-FAKE16-NEXT: s_endpgm
bb:
%fneg.A = fneg <4 x half> %A
%res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half> %fneg.A, <8 x half> %B, <4 x float> %C, i16 %Index)
@@ -250,11 +259,20 @@ bb:
}
define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negB(<4 x half> %A, <8 x half> %B, <4 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negB:
-; GFX12: ; %bb.0: ; %bb
-; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT: global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT: s_endpgm
+; GFX12-TRUE16-LABEL: test_swmmac_f32_16x16x32_f16_negB:
+; GFX12-TRUE16: ; %bb.0: ; %bb
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr13
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v13.l, v10.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v13 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GFX12-TRUE16-NEXT: global_store_b128 v[11:12], v[6:9], off
+; GFX12-TRUE16-NEXT: s_endpgm
+;
+; GFX12-FAKE16-LABEL: test_swmmac_f32_16x16x32_f16_negB:
+; GFX12-FAKE16: ; %bb.0: ; %bb
+; GFX12-FAKE16-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GFX12-FAKE16-NEXT: global_store_b128 v[11:12], v[6:9], off
+; GFX12-FAKE16-NEXT: s_endpgm
bb:
%fneg.B = fneg <8 x half> %B
%res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half> %A, <8 x half> %fneg.B, <4 x float> %C, i16 %Index)
@@ -263,11 +281,20 @@ bb:
}
define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negA(<4 x half> %A, <8 x half> %B, <4 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negA:
-; GFX12: ; %bb.0: ; %bb
-; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT: global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT: s_endpgm
+; GFX12-TRUE16-LABEL: test_swmmac_f16_16x16x32_f16_negA:
+; GFX12-TRUE16: ; %bb.0: ; %bb
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v11.l, v8.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v11 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GFX12-TRUE16-NEXT: global_store_b64 v[9:10], v[6:7], off
+; GFX12-TRUE16-NEXT: s_endpgm
+;
+; GFX12-FAKE16-LABEL: test_swmmac_f16_16x16x32_f16_negA:
+; GFX12-FAKE16: ; %bb.0: ; %bb
+; GFX12-FAKE16-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GFX12-FAKE16-NEXT: global_store_b64 v[9:10], v[6:7], off
+; GFX12-FAKE16-NEXT: s_endpgm
bb:
%fneg.A = fneg <4 x half> %A
%res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half> %fneg.A, <8 x half> %B, <4 x half> %C, i16 %Index)
@@ -276,11 +303,20 @@ bb:
}
define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negB(<4 x half> %A, <8 x half> %B, <4 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negB:
-; GFX12: ; %bb.0: ; %bb
-; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT: global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT: s_endpgm
+; GFX12-TRUE16-LABEL: test_swmmac_f16_16x16x32_f16_negB:
+; GFX12-TRUE16: ; %bb.0: ; %bb
+; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr11
+; GFX12-TRUE16-NEXT: v_mov_b16_e32 v11.l, v8.l
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v11 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GFX12-TRUE16-NEXT: global_store_b64 v[9:10], v[6:7], off
+; GFX12-TRUE16-NEXT: s_endpgm
+;
+; GFX12-FAKE16-LABEL: test_swmmac_f16_16x16x32_f16_negB:
+; GFX12-FAKE16: ; %bb.0: ; %bb
+; GFX12-FAKE16-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GFX12-FAKE16-NEXT: global_store_b64 v[9:10], v[6:7], off
+; GFX12-FAKE16-NEXT: s_endpgm
bb:
%fneg.B = fneg <8 x half> %B
%res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half> %A, <8 x half> %fneg.B, <4 x half> %C, i16 %Index)
More information about the llvm-commits
mailing list